From ca9bc12b90fbc4e2b1f81360f63842c9da54bb3c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 13:47:24 +0100 Subject: drbd: Get rid of BE_DRBD_MAGIC and BE_DRBD_MAGIC_BIG Converting the constants happens at compile time. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9e5f5607eba3..d28202811672 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -334,9 +334,7 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) #define DRBD_MAGIC 0x83740267 -#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) #define DRBD_MAGIC_BIG 0x835a -#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 -- cgit v1.2.3 From 9749f30f1a387070e6e8351f35aeb829eacc3ab6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 20 Jul 2011 14:59:37 +0200 Subject: idr: idr_for_each_entry() macro Inspired by the list_for_each_entry() macro --- include/linux/idr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 255491cf522e..52a9da295296 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id); void __init idr_init_cache(void); +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + #endif /* __IDR_H__ */ -- cgit v1.2.3 From fd340c12c98b57ec0751ebb317057eee41be0c3d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:57:39 +0100 Subject: drbd: Use new header layout The new header layout will only be used if the peer supports it of course. For the first packet and the handshake packet the old (h80) layout is used for compatibility reasons. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 82 +++++++++++++++++--------------------- drivers/block/drbd/drbd_receiver.c | 7 +++- include/linux/drbd.h | 2 +- 4 files changed, 42 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dc669dfe5b0d..4de43481bcb9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -345,7 +345,6 @@ struct p_header95 { u16 magic; /* use DRBD_MAGIC_BIG here */ u16 command; u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ - u8 payload[0]; } __packed; struct p_header { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 55ce48e24b8e..f8cb15c84ed8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1820,12 +1820,36 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) } #endif +static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be32(DRBD_MAGIC); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be16(size); +} + +static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be16(DRBD_MAGIC_BIG); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); +} + +static void prepare_header(struct drbd_conf *mdev, struct p_header *h, + enum drbd_packets cmd, int size) +{ + if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + prepare_header95(mdev, &h->h95, cmd, size); + else + prepare_header80(mdev, &h->h80, cmd, size); +} + /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *hg, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags) { - struct p_header80 *h = (struct p_header80 *)hg; int sent, ok; if (!expect(h)) @@ -1833,9 +1857,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, if (!expect(size)) return false; - h->magic = cpu_to_be32(DRBD_MAGIC); - h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct p_header80)); + prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1878,12 +1900,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct p_header80 h; + struct p_header h; int ok; - h.magic = cpu_to_be32(DRBD_MAGIC); - h.command = cpu_to_be16(cmd); - h.length = cpu_to_be16(size); + prepare_header(mdev, &h, cmd, size); if (!drbd_get_data_sock(mdev)) return 0; @@ -2456,14 +2476,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, int ok; struct p_block_req p; + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size); p.sector = cpu_to_be64(sector); p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); - mutex_lock(&mdev->tconn->data.mutex); ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); @@ -2663,22 +2680,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(P_DATA); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(P_DATA); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } - + prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = - atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); @@ -2748,18 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(cmd); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } - + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; /* p.seq_num = 0; No sequence numbers here.. */ @@ -3028,7 +3022,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); - mdev->tconn->agreed_pro_version = PRO_VERSION_MAX; + /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; @@ -3506,12 +3500,8 @@ int __init drbd_init(void) { int err; - if (sizeof(struct p_handshake) != 80) { - printk(KERN_ERR - "drbd: never change the size or layout " - "of the HandShake packet.\n"); - return -EINVAL; - } + BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); + BUILD_BUG_ON(sizeof(struct p_handshake) != 80); if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9393fe482efc..8f5a241fe20a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -761,6 +761,9 @@ static int drbd_connect(struct drbd_conf *mdev) return -2; clear_bit(DISCARD_CONCURRENT, &mdev->flags); + mdev->tconn->agreed_pro_version = 99; + /* agreed_pro_version must be smaller than 100 so we send the old + header (h80) in the first packet and in the handshake packet. */ sock = NULL; msock = NULL; @@ -935,12 +938,12 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi return false; } - if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) { + if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { *cmd = be16_to_cpu(h->h95.command); - *packet_size = be32_to_cpu(h->h95.length); + *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff; } else { dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d28202811672..35fc08a0a552 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 100 enum drbd_io_error_p { -- cgit v1.2.3 From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:55 +0100 Subject: drbd: use clear_bit_unlock() where appropriate Some open-coded clear_bit(); smp_mb__after_clear_bit(); should in fact have been smp_mb__before_clear_bit(); clear_bit(); Instead, use clear_bit_unlock() to annotate the intention, and have it do the right thing. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_main.c | 3 +-- include/linux/lru_cache.h | 3 +-- lib/lru_cache.c | 10 ++++------ 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e8d652f197c3..4be737055718 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) { struct drbd_bitmap *b = mdev->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); - clear_bit(BM_PAGE_IO_LOCK, addr); - smp_mb__after_clear_bit(); + clear_bit_unlock(BM_PAGE_IO_LOCK, addr); wake_up(&mdev->bitmap->bm_io_wait); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 592f0c949fd0..c77e51a40926 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) put_ldev(mdev); } - clear_bit(BITMAP_IO, &mdev->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(BITMAP_IO, &mdev->flags); wake_up(&mdev->misc_wait); if (work->done) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 7a71ffad037c..4cceafb0732d 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); } static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7ed..9f353f7f41ca 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); } while (0) #define RETURN(x...) do { \ - clear_bit(__LC_PARANOIA, &lc->flags); \ - smp_mb__after_clear_bit(); return x ; } while (0) + clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ + return x ; } while (0) /* BUG() if e is not one of the elements tracked by lc */ #define PARANOIA_LC_ELEMENT(lc, e) do { \ @@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); lc->changing_element = NULL; lc->new_number = LC_FREE; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); RETURN(); } @@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_STARVING, &lc->flags); } RETURN(e->refcnt); } -- cgit v1.2.3 From 46a15bc3ec425b546d140581c28192ab7877ddc4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:01 +0100 Subject: lru_cache: allow multiple changes per transaction Allow multiple changes to the active set of elements in lru_cache. The only current user of lru_cache, drbd, is driving this generalisation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 50 ++------ drivers/block/drbd/drbd_nl.c | 4 +- include/linux/lru_cache.h | 68 +++++++---- lib/lru_cache.c | 243 +++++++++++++++++++++++++++------------ 4 files changed, 225 insertions(+), 140 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1ce3de6eed1b..44097c87fed7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; struct lc_element *tmp; - unsigned long al_flags = 0; int wake; spin_lock_irq(&mdev->al_lock); @@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return NULL; } } - al_ext = lc_get(mdev->act_log, enr); - al_flags = mdev->act_log->flags; + al_ext = lc_get(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - - /* - if (!al_ext) { - if (al_flags & LC_STARVING) - dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); - if (al_flags & LC_DIRTY) - dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); - } - */ - return al_ext; } @@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) mdev->al_writ_cnt++; spin_lock_irq(&mdev->al_lock); - lc_changed(mdev->act_log, al_ext); + lc_committed(mdev->act_log); spin_unlock_irq(&mdev->al_lock); wake_up(&mdev->al_wait); } @@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) struct lc_element *al_ext; int i; - D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); + D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags)); for (i = 0; i < mdev->act_log->nr_elements; i++) { al_ext = lc_element_by_index(mdev->act_log, i); @@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, } ext->rs_left = rs_left; ext->rs_failed = success ? 0 : count; - lc_changed(mdev->resync, &ext->lce); + /* we don't keep a persistent log of the resync lru, + * we can commit any change right away. */ + lc_committed(mdev->resync); } lc_put(mdev->resync, &ext->lce); /* no race, we are within the al_lock! */ @@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wakeup = 1; } if (bm_ext->lce.refcnt == 1) @@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); } return bm_ext; @@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) { - struct lc_element *al_ext; - int rv = 0; + int rv; spin_lock_irq(&mdev->al_lock); - if (unlikely(enr == mdev->act_log->new_number)) - rv = 1; - else { - al_ext = lc_find(mdev->act_log, enr); - if (al_ext) { - if (al_ext->refcnt) - rv = 1; - } - } + rv = lc_is_used(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - /* - if (unlikely(rv)) { - dev_info(DEV, "Delaying sync read until app's write is done\n"); - } - */ return rv; } @@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); goto try_again; } if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wake_up(&mdev->al_wait); D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); } @@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } check_al: for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { - if (unlikely(al_enr+i == mdev->act_log->new_number)) - goto try_again; if (lc_is_used(mdev->act_log, al_enr+i)) goto try_again; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ae8f42e38e4f..0a92f5226c2a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, + n = lc_create("act_log", drbd_al_ext_cache, 1, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } resync_lru = lc_create("resync", drbd_bm_ext_cache, - 61, sizeof(struct bm_extent), + 1, 61, sizeof(struct bm_extent), offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4cceafb0732d..cbafae40c649 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -166,9 +166,11 @@ struct lc_element { /* if we want to track a larger set of objects, * it needs to become arch independend u64 */ unsigned lc_number; - /* special label when on free list */ #define LC_FREE (~0U) + + /* for pending changes */ + unsigned lc_new_number; }; struct lru_cache { @@ -176,6 +178,7 @@ struct lru_cache { struct list_head lru; struct list_head free; struct list_head in_use; + struct list_head to_be_changed; /* the pre-created kmem cache to allocate the objects from */ struct kmem_cache *lc_cache; @@ -186,7 +189,7 @@ struct lru_cache { size_t element_off; /* number of elements (indices) */ - unsigned int nr_elements; + unsigned int nr_elements; /* Arbitrary limit on maximum tracked objects. Practical limit is much * lower due to allocation failures, probably. For typical use cases, * nr_elements should be a few thousand at most. @@ -194,18 +197,19 @@ struct lru_cache { * 8 high bits of .lc_index to be overloaded with flags in the future. */ #define LC_MAX_ACTIVE (1<<24) + /* allow to accumulate a few (index:label) changes, + * but no more than max_pending_changes */ + unsigned int max_pending_changes; + /* number of elements currently on to_be_changed list */ + unsigned int pending_changes; + /* statistics */ - unsigned used; /* number of lelements currently on in_use list */ - unsigned long hits, misses, starving, dirty, changed; + unsigned used; /* number of elements currently on in_use list */ + unsigned long hits, misses, starving, locked, changed; /* see below: flag-bits for lru_cache */ unsigned long flags; - /* when changing the label of an index element */ - unsigned int new_number; - - /* for paranoia when changing the label of an index element */ - struct lc_element *changing_element; void *lc_private; const char *name; @@ -221,10 +225,15 @@ enum { /* debugging aid, to catch concurrent access early. * user needs to guarantee exclusive access by proper locking! */ __LC_PARANOIA, - /* if we need to change the set, but currently there is a changing - * transaction pending, we are "dirty", and must deferr further - * changing requests */ + + /* annotate that the set is "dirty", possibly accumulating further + * changes, until a transaction is finally triggered */ __LC_DIRTY, + + /* Locked, no further changes allowed. + * Also used to serialize changing transactions. */ + __LC_LOCKED, + /* if we need to change the set, but currently there is no free nor * unused element available, we are "starving", and must not give out * further references, to guarantee that eventually some refcnt will @@ -236,9 +245,11 @@ enum { }; #define LC_PARANOIA (1<<__LC_PARANOIA) #define LC_DIRTY (1<<__LC_DIRTY) +#define LC_LOCKED (1<<__LC_LOCKED) #define LC_STARVING (1<<__LC_STARVING) extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); @@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); -extern void lc_changed(struct lru_cache *lc, struct lc_element *e); +extern void lc_committed(struct lru_cache *lc); struct seq_file; extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); @@ -258,31 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char void (*detail) (struct seq_file *, struct lc_element *)); /** - * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set * @lc: the lru cache to operate on * - * Note that the reference counts and order on the active and lru lists may - * still change. Returns true if we acquired the lock. + * Allows (expects) the set to be "dirty". Note that the reference counts and + * order on the active and lru lists may still change. Used to serialize + * changing transactions. Returns true if we aquired the lock. */ -static inline int lc_try_lock(struct lru_cache *lc) +static inline int lc_try_lock_for_transaction(struct lru_cache *lc) { - return !test_and_set_bit(__LC_DIRTY, &lc->flags); + return !test_and_set_bit(__LC_LOCKED, &lc->flags); } +/** + * lc_try_lock - variant to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Note that the reference counts and order on the active and lru lists may + * still change. Only works on a "clean" set. Returns true if we aquired the + * lock, which means there are no pending changes, and any further attempt to + * change the set will not succeed until the next lc_unlock(). + */ +extern int lc_try_lock(struct lru_cache *lc); + /** * lc_unlock - unlock @lc, allow lc_get() to change the set again * @lc: the lru cache to operate on */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit_unlock(__LC_DIRTY, &lc->flags); + clear_bit(__LC_DIRTY, &lc->flags); + clear_bit_unlock(__LC_LOCKED, &lc->flags); } -static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e = lc_find(lc, enr); - return e && e->refcnt; -} +extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); #define lc_entry(ptr, type, member) \ container_of(ptr, type, member) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 17621684758a..d71d89498943 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); BUG_ON(i >= lc_->nr_elements); \ BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/* We need to atomically + * - try to grab the lock (set LC_LOCKED) + * - only if there is no pending transaction + * (neither LC_DIRTY nor LC_STARVING is set) + * Because of PARANOIA_ENTRY() above abusing lc->flags as well, + * it is not sufficient to just say + * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); + */ +int lc_try_lock(struct lru_cache *lc) +{ + unsigned long val; + do { + val = cmpxchg(&lc->flags, 0, LC_LOCKED); + } while (unlikely (val == LC_PARANOIA)); + /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ + return 0 == val; +#if 0 + /* Alternative approach, spin in case someone enters or leaves a + * PARANOIA_ENTRY()/RETURN() section. */ + unsigned long old, new, val; + do { + old = lc->flags & LC_PARANOIA; + new = old | LC_LOCKED; + val = cmpxchg(&lc->flags, old, new); + } while (unlikely (val == (old ^ LC_PARANOIA))); + return old == val; +#endif +} + /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects * @e_off: offset to the &struct lc_element member in a tracked object @@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); * or NULL on (allocation) failure. */ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off) { struct hlist_head *slot = NULL; @@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->name = name; lc->element_size = e_size; lc->element_off = e_off; lc->nr_elements = e_count; - lc->new_number = LC_FREE; + lc->max_pending_changes = max_pending_changes; lc->lc_cache = cache; lc->lc_element = element; lc->lc_slot = slot; @@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, e = p + e_off; e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); element[i] = e; } @@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->used = 0; lc->hits = 0; lc->misses = 0; lc->starving = 0; - lc->dirty = 0; + lc->locked = 0; lc->changed = 0; + lc->pending_changes = 0; lc->flags = 0; - lc->changing_element = NULL; - lc->new_number = LC_FREE; memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); for (i = 0; i < lc->nr_elements; i++) { @@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) /* re-init it */ e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); } } @@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) /* NOTE: * total calls to lc_get are * (starving + hits + misses) - * misses include "dirty" count (update from an other thread in + * misses include "locked" count (update from an other thread in * progress) and "changed", when this in fact lead to an successful * update of the cache. */ return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) @@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) } -/** - * lc_find - find element by label, if present in the hash table - * @lc: The lru_cache object - * @enr: element number - * - * Returns the pointer to an element, if the element with the requested - * "label" or element number is present in the hash table, - * or NULL if not found. Does not change the refcnt. - */ -struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, + bool include_changing) { struct hlist_node *n; struct lc_element *e; @@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) BUG_ON(!lc); BUG_ON(!lc->nr_elements); hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { - if (e->lc_number == enr) + /* "about to be changed" elements, pending transaction commit, + * are hashed by their "new number". "Normal" elements have + * lc_number == lc_new_number. */ + if (e->lc_new_number != enr) + continue; + if (e->lc_new_number == e->lc_number || include_changing) return e; + break; } return NULL; } -/* returned element will be "recycled" immediately */ -static struct lc_element *lc_evict(struct lru_cache *lc) +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + * Ignores elements that are "about to be used", i.e. not yet in the active + * set, but still pending transaction commit. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) { - struct list_head *n; - struct lc_element *e; - - if (list_empty(&lc->lru)) - return NULL; - - n = lc->lru.prev; - e = list_entry(n, struct lc_element, list); - - PARANOIA_LC_ELEMENT(lc, e); + return __lc_find(lc, enr, 0); +} - list_del(&e->list); - hlist_del(&e->colision); - return e; +/** + * lc_is_used - find element by label + * @lc: The lru_cache object + * @enr: element number + * + * Returns true, if the element with the requested "label" or element number is + * present in the hash table, and is used (refcnt > 0). + * Also finds elements that are not _currently_ used but only "about to be + * used", i.e. on the "to_be_changed" list, pending transaction commit. + */ +bool lc_is_used(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e = __lc_find(lc, enr, 1); + return e && e->refcnt; } /** @@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt); - e->lc_number = LC_FREE; + e->lc_number = e->lc_new_number = LC_FREE; hlist_del_init(&e->colision); list_move(&e->list, &lc->free); RETURN(); } -static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) { struct list_head *n; + struct lc_element *e; + + if (!list_empty(&lc->free)) + n = lc->free.next; + else if (!list_empty(&lc->lru)) + n = lc->lru.prev; + else + return NULL; + + e = list_entry(n, struct lc_element, list); + PARANOIA_LC_ELEMENT(lc, e); - if (list_empty(&lc->free)) - return lc_evict(lc); + e->lc_new_number = new_number; + if (!hlist_unhashed(&e->colision)) + __hlist_del(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); + list_move(&e->list, &lc->to_be_changed); - n = lc->free.next; - list_del(n); - return list_entry(n, struct lc_element, list); + return e; } static int lc_unused_element_available(struct lru_cache *lc) @@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - e = lc_find(lc, enr); - if (e) { + e = __lc_find(lc, enr, 1); + /* if lc_new_number != lc_number, + * this enr is currently being pulled in already, + * and will be available once the pending transaction + * has been committed. */ + if (e && e->lc_new_number == e->lc_number) { ++lc->hits; if (e->refcnt++ == 0) lc->used++; @@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool if (!may_change) RETURN(NULL); + /* It has been found above, but on the "to_be_changed" list, not yet + * committed. Don't pull it in twice, wait for the transaction, then + * try again */ + if (e) + RETURN(NULL); + + /* To avoid races with lc_try_lock(), first, mark us dirty + * (using test_and_set_bit, as it implies memory barriers), ... */ + test_and_set_bit(__LC_DIRTY, &lc->flags); + + /* ... only then check if it is locked anyways. If lc_unlock clears + * the dirty bit again, that's not a problem, we will come here again. + */ + if (test_bit(__LC_LOCKED, &lc->flags)) { + ++lc->locked; + RETURN(NULL); + } + /* In case there is nothing available and we can not kick out * the LRU element, we have to wait ... */ @@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & LC_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; + /* It was not present in the active set. We are going to recycle an + * unused (or even "free") element, but we won't accumulate more than + * max_pending_changes changes. */ + if (lc->pending_changes >= lc->max_pending_changes) RETURN(NULL); - } - e = lc_get_unused_element(lc); + e = lc_prepare_for_change(lc, enr); BUG_ON(!e); clear_bit(__LC_STARVING, &lc->flags); BUG_ON(++e->refcnt != 1); lc->used++; - - lc->changing_element = e; - lc->new_number = enr; + lc->pending_changes++; RETURN(e); } @@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool * pointer to an UNUSED element with some different element number, * where that different number may also be %LC_FREE. * - * In this case, the cache is marked %LC_DIRTY (blocking further changes), - * and the returned element pointer is removed from the lru list and - * hash collision chains. The user now should do whatever housekeeping - * is necessary. - * Then he must call lc_changed(lc,element_pointer), to finish - * the change. + * In this case, the cache is marked %LC_DIRTY, + * so lc_try_lock() will no longer succeed. + * The returned element pointer is moved to the "to_be_changed" list, + * and registered with the new element number on the hash collision chains, + * so it is possible to pick it up from lc_is_used(). + * Up to "max_pending_changes" (see lc_create()) can be accumulated. + * The user now should do whatever housekeeping is necessary, + * typically serialize on lc_try_lock_for_transaction(), then call + * lc_committed(lc) and lc_unlock(), to finish the change. * * NOTE: The user needs to check the lc_number on EACH use, so he recognizes * any cache set change. @@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) } /** - * lc_changed - tell @lc that the change has been recorded + * lc_committed - tell @lc that pending changes have been recorded * @lc: the lru cache to operate on - * @e: the element pending label change + * + * User is expected to serialize on explicit lc_try_lock_for_transaction() + * before the transaction is started, and later needs to lc_unlock() explicitly + * as well. */ -void lc_changed(struct lru_cache *lc, struct lc_element *e) +void lc_committed(struct lru_cache *lc) { + struct lc_element *e, *tmp; + PARANOIA_ENTRY(); - BUG_ON(e != lc->changing_element); - PARANOIA_LC_ELEMENT(lc, e); - ++lc->changed; - e->lc_number = lc->new_number; - list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); - lc->changing_element = NULL; - lc->new_number = LC_FREE; - clear_bit_unlock(__LC_DIRTY, &lc->flags); + list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { + /* count number of changes, not number of transactions */ + ++lc->changed; + e->lc_number = e->lc_new_number; + list_move(&e->list, &lc->in_use); + } + lc->pending_changes = 0; RETURN(); } @@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) PARANOIA_ENTRY(); PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt == 0); - BUG_ON(e == lc->changing_element); + BUG_ON(e->lc_number != e->lc_new_number); if (--e->refcnt == 0) { /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); @@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) void lc_set(struct lru_cache *lc, unsigned int enr, int index) { struct lc_element *e; + struct list_head *lh; if (index < 0 || index >= lc->nr_elements) return; e = lc_element_by_index(lc, index); - e->lc_number = enr; + BUG_ON(e->lc_number != e->lc_new_number); + BUG_ON(e->refcnt != 0); + e->lc_number = e->lc_new_number = enr; hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); + if (enr == LC_FREE) + lh = &lc->free; + else { + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + lh = &lc->lru; + } + list_move(&e->list, lh); } /** @@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); -EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); +EXPORT_SYMBOL(lc_try_lock); +EXPORT_SYMBOL(lc_is_used); -- cgit v1.2.3 From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:03 +0100 Subject: drbd: new on-disk activity log transaction format Use a new on-disk transaction format for the activity log, which allows for multiple changes to the active set per transaction. Using 4k transaction blocks, we can now get rid of the work-around code to deal with devices not supporting 512 byte logical block size. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 409 ++++++++++++++++++++++++--------------- drivers/block/drbd/drbd_int.h | 44 +++-- drivers/block/drbd/drbd_main.c | 4 - drivers/block/drbd/drbd_nl.c | 42 +--- include/linux/drbd.h | 4 + include/linux/drbd_limits.h | 8 +- 6 files changed, 302 insertions(+), 209 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 44097c87fed7..ea3895de4e6d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -24,21 +24,67 @@ */ #include +#include #include +#include +#include #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial checksum in our on disk activity log. - * With that we can ensure correct operation even when the storage - * device might do a partial (last) sector write while losing power. - */ -struct __packed al_transaction { - u32 magic; - u32 tr_number; - struct __packed { - u32 pos; - u32 extent; } updates[1 + AL_EXTENTS_PT]; - u32 xor_sum; +/* all fields on disc in big endian */ +struct __packed al_transaction_on_disk { + /* don't we all like magic */ + __be32 magic; + + /* to identify the most recent transaction block + * in the on disk ring buffer */ + __be32 tr_number; + + /* checksum on the full 4k block, with this field set to 0. */ + __be32 crc32c; + + /* type of transaction, special transaction types like: + * purge-all, set-all-idle, set-all-active, ... to-be-defined */ + __be16 transaction_type; + + /* we currently allow only a few thousand extents, + * so 16bit will be enough for the slot number. */ + + /* how many updates in this transaction */ + __be16 n_updates; + + /* maximum slot number, "al-extents" in drbd.conf speak. + * Having this in each transaction should make reconfiguration + * of that parameter easier. */ + __be16 context_size; + + /* slot number the context starts with */ + __be16 context_start_slot_nr; + + /* Some reserved bytes. Expected usage is a 64bit counter of + * sectors-written since device creation, and other data generation tag + * supporting usage */ + __be32 __reserved[4]; + + /* --- 36 byte used --- */ + + /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes + * in one transaction, then use the remaining byte in the 4k block for + * context information. "Flexible" number of updates per transaction + * does not help, as we have to account for the case when all update + * slots are used anyways, so it would only complicate code without + * additional benefit. + */ + __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; + + /* but the extent number is 32bit, which at an extent size of 4 MiB + * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ + __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; + + /* --- 420 bytes used (36 + 64*6) --- */ + + /* 4096 - 420 = 3676 = 919 * 4 */ + __be32 context[AL_CONTEXT_PER_TRANSACTION]; }; struct update_odbm_work { @@ -48,11 +94,8 @@ struct update_odbm_work { struct update_al_work { struct drbd_work w; - struct lc_element *al_ext; struct completion event; - unsigned int enr; - /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ - unsigned int old_enr; + int err; }; struct drbd_atodb_wait { @@ -107,67 +150,30 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int logical_block_size, mask, ok; - int offset = 0; + int ok; struct page *iop = mdev->md_io_page; D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); BUG_ON(!bdev->md_bdev); - logical_block_size = bdev_logical_block_size(bdev->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - /* in case logical_block_size != 512 [ s390 only? ] */ - if (logical_block_size != MD_SECTOR_SIZE) { - mask = (logical_block_size / MD_SECTOR_SIZE) - 1; - D_ASSERT(mask == 1 || mask == 3 || mask == 7); - D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); - offset = sector & mask; - sector = sector & ~mask; - iop = mdev->md_io_tmpp; - - if (rw & WRITE) { - /* these are GFP_KERNEL pages, pre-allocated - * on device initialization */ - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, - READ, logical_block_size); - - if (unlikely(!ok)) { - dev_err(DEV, "drbd_md_sync_page_io(,%llus," - "READ [logical_block_size!=512]) failed!\n", - (unsigned long long)sector); - return 0; - } - - memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); - } - } + dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", + current->comm, current->pid, __func__, + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); if (sector < drbd_md_first_sector(bdev) || - sector > drbd_md_last_sector(bdev)) + sector + 7 > drbd_md_last_sector(bdev)) dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); return 0; } - if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); - } - return ok; } @@ -211,20 +217,34 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) * current->bio_tail list now. * we have to delegate updates to the activity log * to the worker thread. */ - init_completion(&al_work.event); - al_work.al_ext = al_ext; - al_work.enr = enr; - al_work.old_enr = al_ext->lc_number; - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); - wait_for_completion(&al_work.event); - mdev->al_writ_cnt++; + /* Serialize multiple transactions. + * This uses test_and_set_bit, memory barrier is implicit. + * Optimization potential: + * first check for transaction number > old transaction number, + * so not all waiters have to lock/unlock. */ + wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log)); - spin_lock_irq(&mdev->al_lock); - lc_committed(mdev->act_log); - spin_unlock_irq(&mdev->al_lock); + /* Double check: it may have been committed by someone else, + * while we have been waiting for the lock. */ + if (al_ext->lc_number != enr) { + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + mdev->al_writ_cnt++; + + spin_lock_irq(&mdev->al_lock); + /* FIXME + if (al_work.err) + we need an "lc_cancel" here; + */ + lc_committed(mdev->act_log); + spin_unlock_irq(&mdev->al_lock); + } + lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); } } @@ -283,95 +303,118 @@ w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); struct drbd_conf *mdev = w->mdev; - struct lc_element *updated = aw->al_ext; - const unsigned int new_enr = aw->enr; - const unsigned int evicted = aw->old_enr; - struct al_transaction *buffer; + struct al_transaction_on_disk *buffer; + struct lc_element *e; sector_t sector; - int i, n, mx; - unsigned int extent_nr; - u32 xor_sum = 0; + int i, mx; + unsigned extent_nr; + unsigned crc = 0; if (!get_ldev(mdev)) { - dev_err(DEV, - "disk is %s, cannot start al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + dev_err(DEV, "disk is %s, cannot start al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); return 1; } - /* do we have to do a bitmap write, first? - * TODO reduce maximum latency: - * submit both bios, then wait for both, - * instead of doing two synchronous sector writes. - * For now, we must not write the transaction, - * if we cannot write out the bitmap of the evicted extent. */ - if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) - drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { dev_err(DEV, - "disk is %s, cannot write al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + "disk is %s, cannot write al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = (struct al_transaction *)page_address(mdev->md_io_page); + buffer = page_address(mdev->md_io_page); - buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); + memset(buffer, 0, sizeof(*buffer)); + buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); - n = lc_index_of(mdev->act_log, updated); + i = 0; + + /* Even though no one can start to change this list + * once we set the LC_LOCKED -- from drbd_al_begin_io(), + * lc_try_lock_for_transaction() --, someone may still + * be in the process of changing it. */ + spin_lock_irq(&mdev->al_lock); + list_for_each_entry(e, &mdev->act_log->to_be_changed, list) { + if (i == AL_UPDATES_PER_TRANSACTION) { + i++; + break; + } + buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); + buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); + if (e->lc_number != LC_FREE) + drbd_bm_mark_for_writeout(mdev, + al_extent_to_bm_page(e->lc_number)); + i++; + } + spin_unlock_irq(&mdev->al_lock); + BUG_ON(i > AL_UPDATES_PER_TRANSACTION); - buffer->updates[0].pos = cpu_to_be32(n); - buffer->updates[0].extent = cpu_to_be32(new_enr); + buffer->n_updates = cpu_to_be16(i); + for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { + buffer->update_slot_nr[i] = cpu_to_be16(-1); + buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); + } - xor_sum ^= new_enr; + buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements); + buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle); - mx = min_t(int, AL_EXTENTS_PT, + mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { unsigned idx = mdev->al_tr_cycle + i; extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; - buffer->updates[i+1].pos = cpu_to_be32(idx); - buffer->updates[i+1].extent = cpu_to_be32(extent_nr); - xor_sum ^= extent_nr; + buffer->context[i] = cpu_to_be32(extent_nr); } - for (; i < AL_EXTENTS_PT; i++) { - buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); - buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); - xor_sum ^= LC_FREE; - } - mdev->al_tr_cycle += AL_EXTENTS_PT; + for (; i < AL_CONTEXT_PER_TRANSACTION; i++) + buffer->context[i] = cpu_to_be32(LC_FREE); + + mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; - buffer->xor_sum = cpu_to_be32(xor_sum); - sector = mdev->ldev->md.md_offset - + mdev->ldev->md.al_offset + mdev->al_tr_pos; - - if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) - drbd_chk_io_error(mdev, 1, true); + + mdev->ldev->md.al_offset + + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9); - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + crc = crc32c(0, buffer, 4096); + buffer->crc32c = cpu_to_be32(crc); - D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); - mdev->al_tr_number++; + if (drbd_bm_write_hinted(mdev)) + aw->err = -EIO; + /* drbd_chk_io_error done already */ + else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + aw->err = -EIO; + drbd_chk_io_error(mdev, 1, true); + } else { + /* advance ringbuffer position and transaction counter */ + mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); + mdev->al_tr_number++; + } mutex_unlock(&mdev->md_io_mutex); - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } +/* FIXME + * reading of the activity log, + * and potentially dirtying of the affected bitmap regions, + * should be done from userland only. + * DRBD would simply always attach with an empty activity log, + * and refuse to attach to something that looks like a crashed primary. + */ + /** * drbd_al_read_tr() - Read a single transaction from the on disk activity log * @mdev: DRBD device. @@ -383,27 +426,39 @@ w_al_write_transaction(struct drbd_work *w, int unused) */ static int drbd_al_read_tr(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - struct al_transaction *b, int index) { + struct al_transaction_on_disk *b = page_address(mdev->md_io_page); sector_t sector; - int rv, i; - u32 xor_sum = 0; + u32 crc; - sector = bdev->md.md_offset + bdev->md.al_offset + index; + sector = bdev->md.md_offset + + bdev->md.al_offset + + index * (MD_BLOCK_SIZE>>9); /* Dont process error normally, * as this is done before disk is attached! */ if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; - rv = (b->magic == cpu_to_be32(DRBD_MAGIC)); + if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) + return 0; + + if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) + return 0; - for (i = 0; i < AL_EXTENTS_PT + 1; i++) - xor_sum ^= be32_to_cpu(b->updates[i].extent); - rv &= (xor_sum == be32_to_cpu(b->xor_sum)); + if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) + return 0; - return rv; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) + return 0; + + crc = be32_to_cpu(b->crc32c); + b->crc32c = 0; + if (!expect(crc == crc32c(0, b, 4096))) + return 0; + + return 1; } /** @@ -415,7 +470,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, */ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { - struct al_transaction *buffer; + struct al_transaction_on_disk *b; int i; int rv; int mx; @@ -428,25 +483,36 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) u32 to_tnr = 0; u32 cnr; - mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); + /* Note that this is expected to be called with a newly created, + * clean and all unused activity log of the "expected size". + */ /* lock out all other meta data io for now, * and make sure the page is mapped. */ mutex_lock(&mdev->md_io_mutex); - buffer = page_address(mdev->md_io_page); + b = page_address(mdev->md_io_page); + + /* Always use the full ringbuffer space for now. + * possible optimization: read in all of it, + * then scan the in-memory pages. */ + + mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* Find the valid transaction in the log */ - for (i = 0; i <= mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + for (i = 0; i < mx; i++) { + rv = drbd_al_read_tr(mdev, bdev, i); + /* invalid data in that block */ if (rv == 0) continue; + + /* IO error */ if (rv == -1) { mutex_unlock(&mdev->md_io_mutex); return 0; } - cnr = be32_to_cpu(buffer->tr_number); + cnr = be32_to_cpu(b->tr_number); if (++found_valid == 1) { from = i; to = i; @@ -454,8 +520,11 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) to_tnr = cnr; continue; } + + D_ASSERT(cnr != to_tnr); + D_ASSERT(cnr != from_tnr); if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx+1); + D_ASSERT(from_tnr - cnr + i - from == mx); from = i; from_tnr = cnr; } @@ -476,11 +545,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ i = from; while (1) { - int j, pos; - unsigned int extent_nr; - unsigned int trn; + struct lc_element *e; + unsigned j, n, slot, extent_nr; - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + rv = drbd_al_read_tr(mdev, bdev, i); if (!expect(rv != 0)) goto cancel; if (rv == -1) { @@ -488,23 +556,55 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) return 0; } - trn = be32_to_cpu(buffer->tr_number); + /* deal with different transaction types. + * not yet implemented */ + if (!expect(b->transaction_type == 0)) + goto cancel; - spin_lock_irq(&mdev->al_lock); + /* on the fly re-create/resize activity log? + * will be a special transaction type flag. */ + if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) + goto cancel; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) + goto cancel; - /* This loop runs backwards because in the cyclic - elements there might be an old version of the - updated element (in slot 0). So the element in slot 0 - can overwrite old versions. */ - for (j = AL_EXTENTS_PT; j >= 0; j--) { - pos = be32_to_cpu(buffer->updates[j].pos); - extent_nr = be32_to_cpu(buffer->updates[j].extent); + /* We are the only user of the activity log right now, + * don't actually need to take that lock. */ + spin_lock_irq(&mdev->al_lock); - if (extent_nr == LC_FREE) - continue; + /* first, apply the context, ... */ + for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); + j < AL_CONTEXT_PER_TRANSACTION && + slot < mdev->act_log->nr_elements; j++, slot++) { + extent_nr = be32_to_cpu(b->context[j]); + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); + } - lc_set(mdev->act_log, extent_nr, pos); - active_extents++; + /* ... then apply the updates, + * which override the context information. + * drbd_al_read_tr already did the rangecheck + * on n <= AL_UPDATES_PER_TRANSACTION */ + n = be16_to_cpu(b->n_updates); + for (j = 0; j < n; j++) { + slot = be16_to_cpu(b->update_slot_nr[j]); + extent_nr = be32_to_cpu(b->update_extent_nr[j]); + if (!expect(slot < mdev->act_log->nr_elements)) + break; + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); } spin_unlock_irq(&mdev->al_lock); @@ -514,15 +614,12 @@ cancel: if (i == to) break; i++; - if (i > mx) + if (i >= mx) i = 0; } mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = to; - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index edfdeb62c18f..3213808a898a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1069,7 +1069,6 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct page *md_io_tmpp; /* for logical_block_size != 512 */ struct mutex md_io_mutex; /* protects the md_io_buffer */ spinlock_t al_lock; wait_queue_head_t al_wait; @@ -1259,22 +1258,39 @@ extern void drbd_ldev_destroy(struct drbd_conf *mdev); * either at the end of the backing device * or on a separate meta data device. */ -#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ /* The following numbers are sectors */ -#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ -#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ -/* Allows up to about 3.8TB */ -#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) - -/* Since the smalles IO unit is usually 512 byte */ -#define MD_SECTOR_SHIFT 9 -#define MD_SECTOR_SIZE (1<ldev); mdev->ldev = NULL;); - if (mdev->md_io_tmpp) { - __free_page(mdev->md_io_tmpp); - mdev->md_io_tmpp = NULL; - } clear_bit(GO_DISKLESS, &mdev->flags); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0a92f5226c2a..90d731723205 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -527,7 +527,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, case DRBD_MD_INDEX_FLEX_INT: bdev->md.md_offset = drbd_md_ss__(mdev, bdev); /* al size is still fixed */ - bdev->md.al_offset = -MD_AL_MAX_SIZE; + bdev->md.al_offset = -MD_AL_SECTORS; /* we need (slightly less than) ~ this much bitmap sectors: */ md_size_sect = drbd_get_capacity(bdev->backing_bdev); md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); @@ -751,8 +751,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev) unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= 7)) - mdev->sync_conf.al_extents = 127; + if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) + mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && mdev->act_log->nr_elements == mdev->sync_conf.al_extents) @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, 1, + n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -932,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp union drbd_state ns, os; enum drbd_state_rv rv; int cp_discovered = 0; - int logical_block_size; drbd_reconfig_start(mdev); @@ -1087,25 +1086,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_set_sector_offsets(mdev, nbc); - /* allocate a second IO page if logical_block_size != 512 */ - logical_block_size = bdev_logical_block_size(nbc->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - if (logical_block_size != MD_SECTOR_SIZE) { - if (!mdev->md_io_tmpp) { - struct page *page = alloc_page(GFP_NOIO); - if (!page) - goto force_diskless_dec; - - dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", - logical_block_size, MD_SECTOR_SIZE); - dev_warn(DEV, "Workaround engaged (has performance impact).\n"); - - mdev->md_io_tmpp = page; - } - } - if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = ERR_NOMEM; @@ -1804,14 +1784,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.rate >= 1)) sc.rate = 1; - if (!expect(sc.al_extents >= 7)) - sc.al_extents = 127; /* arbitrary minimum */ -#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) - if (sc.al_extents > AL_MAX) { - dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); - sc.al_extents = AL_MAX; - } -#undef AL_MAX + + /* clip to allowed range */ + if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) + sc.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) + sc.al_extents = DRBD_AL_EXTENTS_MAX; /* to avoid spurious errors when configuring minors before configuring * the minors they depend on: if necessary, first create the minor we diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 35fc08a0a552..70a688b92c1b 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -336,6 +336,10 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +/* how I came up with this magic? + * base64 decode "actlog==" ;) */ +#define DRBD_AL_MAGIC 0x69cb65a2 + /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 #define DRBD_MD_INDEX_FLEX_EXT -2 diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 447c36752385..75f05af33725 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -102,10 +102,12 @@ #define DRBD_RATE_DEF 250 /* kb/second */ /* less than 7 would hit performance unnecessarily. - * 3833 is the largest prime that still does fit - * into 64 sectors of activity log */ + * 919 slots context information per transaction, + * 32k activity log, 4k transaction size, + * one transaction in flight: + * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 #define DRBD_AFTER_MIN -1 -- cgit v1.2.3 From 1aba4d7fcfabe999e0c99683b394aa76d5c42842 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 15:38:08 +0100 Subject: drbd: Preparing the connector interface to operator on connections Up to now it only operated on minor numbers. Now it can work also on named connections. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 15 +++++++ drivers/block/drbd/drbd_nl.c | 96 +++++++++++++++++++++++++++--------------- include/linux/drbd.h | 19 +++++++-- 4 files changed, 94 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 48367e53a7a5..033af1995867 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1479,6 +1479,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); +struct drbd_tconn *conn_by_name(const char *name); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cbec5ff2cc74..4761426f9ad7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2196,6 +2196,21 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq) INIT_LIST_HEAD(&wq->q); } +struct drbd_tconn *conn_by_name(const char *name) +{ + struct drbd_tconn *tconn; + + write_lock_irq(&global_state_lock); + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (!strcmp(tconn->name, name)) + goto found; + } + tconn = NULL; +found: + write_unlock_irq(&global_state_lock); + return tconn; +} + struct drbd_tconn *drbd_new_tconn(char *name) { struct drbd_tconn *tconn; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b141f891f643..27a43d138f6b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2184,42 +2184,57 @@ out: return 0; } +enum cn_handler_type { + CHT_MINOR, + CHT_CONN, + CHT_CTOR, + /* CHT_RES, later */ +}; + struct cn_handler_struct { - int (*function)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); + enum cn_handler_type type; + union { + int (*minor_based)(struct drbd_conf *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*conn_based)(struct drbd_tconn *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*constructor)(struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + }; int reply_body_size; }; static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { &drbd_nl_primary, 0 }, - [ P_secondary ] = { &drbd_nl_secondary, 0 }, - [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, - [ P_detach ] = { &drbd_nl_detach, 0 }, - [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, - [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, - [ P_resize ] = { &drbd_nl_resize, 0 }, - [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, - [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, - [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, - [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, - [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, - [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, - [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, - [ P_outdate ] = { &drbd_nl_outdate, 0 }, - [ P_get_config ] = { &drbd_nl_get_config, + [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, + [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, + [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, + [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, + [ P_net_conf ] = { CHT_MINOR, { &drbd_nl_net_conf }, 0 }, + [ P_disconnect ] = { CHT_MINOR, { &drbd_nl_disconnect }, 0 }, + [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, + [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, + [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, + [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, + [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, + [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, + [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, + [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, + [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, + [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, sizeof(struct syncer_conf_tag_len_struct) + sizeof(struct disk_conf_tag_len_struct) + sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { &drbd_nl_get_state, + [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, sizeof(struct get_state_tag_len_struct) + sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { &drbd_nl_get_uuids, + [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, + [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, - [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, + [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, + [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) @@ -2229,6 +2244,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; struct drbd_conf *mdev; + struct drbd_tconn *tconn; int retcode, rr; int reply_size = sizeof(struct cn_msg) + sizeof(struct drbd_nl_cfg_reply) @@ -2244,13 +2260,6 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms goto fail; } - mdev = ensure_mdev(nlp->drbd_minor, - (nlp->flags & DRBD_NL_CREATE_DEVICE)); - if (!mdev) { - retcode = ERR_MINOR_INVALID; - goto fail; - } - if (nlp->packet_type >= P_nl_after_last_packet || nlp->packet_type == P_return_code_only) { retcode = ERR_PACKET_NR; @@ -2260,7 +2269,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms cm = cnd_table + nlp->packet_type; /* This may happen if packet number is 0: */ - if (cm->function == NULL) { + if (cm->minor_based == NULL) { retcode = ERR_PACKET_NR; goto fail; } @@ -2281,7 +2290,28 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ /* reply->tag_list; might be modified by cm->function. */ - rr = cm->function(mdev, nlp, reply); + retcode = ERR_MINOR_INVALID; + rr = 0; + switch (cm->type) { + case CHT_MINOR: + mdev = minor_to_mdev(nlp->drbd_minor); + if (!mdev) + goto fail; + rr = cm->minor_based(mdev, nlp, reply); + break; + case CHT_CONN: + tconn = conn_by_name(nlp->obj_name); + if (!tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto fail; + } + rr = cm->conn_based(tconn, nlp, reply); + break; + case CHT_CTOR: + rr = cm->constructor(nlp, reply); + break; + /* case CHT_RES: */ + } cn_reply->id = req->id; cn_reply->seq = req->seq; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 70a688b92c1b..7683b4ab6583 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,6 +155,7 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, + ERR_CONN_NOT_KNOWN = 158, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -347,8 +348,11 @@ enum drbd_timeout_flag { /* Start of the new netlink/connector stuff */ -#define DRBD_NL_CREATE_DEVICE 0x01 -#define DRBD_NL_SET_DEFAULTS 0x02 +enum drbd_ncr_flags { + DRBD_NL_CREATE_DEVICE = 0x01, + DRBD_NL_SET_DEFAULTS = 0x02, +}; +#define DRBD_NL_OBJ_NAME_LEN 32 /* For searching a vacant cn_idx value */ @@ -356,8 +360,15 @@ enum drbd_timeout_flag { struct drbd_nl_cfg_req { int packet_type; - unsigned int drbd_minor; - int flags; + union { + struct { + unsigned int drbd_minor; + enum drbd_ncr_flags flags; + }; + struct { + char obj_name[DRBD_NL_OBJ_NAME_LEN]; + }; + }; unsigned short tag_list[]; }; -- cgit v1.2.3 From 774b305518a68a50df4f479bcf79da2add724e6e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Feb 2011 02:07:03 -0500 Subject: drbd: Implemented new commands to create/delete connections/minors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 68 ++++++++++++++------------ drivers/block/drbd/drbd_nl.c | 106 +++++++++++++++++++++++++---------------- include/linux/drbd.h | 3 ++ include/linux/drbd_nl.h | 12 +++++ 5 files changed, 120 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a27e2a4e038d..535d503886d8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1258,7 +1258,6 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); - /* Meta data layout We reserve a 128MB Block (4k aligned) * either at the end of the backing device @@ -1476,8 +1475,9 @@ extern wait_queue_head_t drbd_pp_wait; extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); -extern struct drbd_conf *drbd_new_device(unsigned int minor); +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_free_mdev(struct drbd_conf *mdev); +extern void drbd_delete_device(unsigned int minor); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2bfd63058f40..ec7d0d98657c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -614,13 +614,16 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas return thi ? thi->name : task->comm; } -#ifdef CONFIG_SMP int conn_lowest_minor(struct drbd_tconn *tconn) { int minor = 0; - idr_get_next(&tconn->volumes, &minor); + + if (!idr_get_next(&tconn->volumes, &minor)) + return -1; return minor; } + +#ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * @mdev: DRBD device. @@ -2078,15 +2081,16 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev) dev_err(DEV, "%d EEs in net list found!\n", rr); } -/* caution. no locking. - * currently only used from module cleanup code. */ -static void drbd_delete_device(unsigned int minor) +/* caution. no locking. */ +void drbd_delete_device(unsigned int minor) { struct drbd_conf *mdev = minor_to_mdev(minor); if (!mdev) return; + idr_remove(&mdev->tconn->volumes, minor); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2101,7 +2105,6 @@ static void drbd_delete_device(unsigned int minor) bdput(mdev->this_bdev); drbd_free_resources(mdev); - drbd_free_tconn(mdev->tconn); drbd_release_ee_lists(mdev); @@ -2223,6 +2226,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) + goto fail; + if (!tl_init(tconn)) goto fail; @@ -2252,6 +2258,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) fail: tl_cleanup(tconn); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn); @@ -2265,6 +2272,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_unlock_irq(&global_state_lock); idr_destroy(&tconn->volumes); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); @@ -2272,32 +2280,31 @@ void drbd_free_tconn(struct drbd_tconn *tconn) kfree(tconn); } -struct drbd_conf *drbd_new_device(unsigned int minor) +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) { struct drbd_conf *mdev; struct gendisk *disk; struct request_queue *q; - char conn_name[9]; /* drbd1234N */ - int vnr; + int vnr_got = vnr; + + mdev = minor_to_mdev(minor); + if (mdev) + return ERR_MINOR_EXISTS; /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) - return NULL; - sprintf(conn_name, "drbd%d", minor); - mdev->tconn = drbd_new_tconn(conn_name); - if (!mdev->tconn) - goto out_no_tconn; - if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL)) - goto out_no_cpumask; - if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr)) - goto out_no_cpumask; - if (vnr != 0) { - dev_err(DEV, "vnr = %d\n", vnr); - goto out_no_cpumask; - } - if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) - goto out_no_cpumask; + return ERR_NOMEM; + + mdev->tconn = tconn; + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) + goto out_no_idr; + if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) + goto out_no_idr; + if (vnr_got != vnr) { + dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); + goto out_no_q; + } mdev->minor = minor; @@ -2354,7 +2361,10 @@ struct drbd_conf *drbd_new_device(unsigned int minor) INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - return mdev; + minor_table[minor] = mdev; + add_disk(disk); + + return NO_ERROR; /* out_whatever_else: kfree(mdev->current_epoch); */ @@ -2367,12 +2377,10 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - free_cpumask_var(mdev->tconn->cpu_mask); -out_no_cpumask: - drbd_free_tconn(mdev->tconn); -out_no_tconn: + idr_remove(&tconn->volumes, vnr_got); +out_no_idr: kfree(mdev); - return NULL; + return ERR_NOMEM; } /* counterpart of drbd_new_device. diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 455a51dd364d..f2739fd188a0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -443,40 +443,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) return rv; } -static struct drbd_conf *ensure_mdev(int minor, int create) -{ - struct drbd_conf *mdev; - - if (minor >= minor_count) - return NULL; - - mdev = minor_to_mdev(minor); - - if (!mdev && create) { - struct gendisk *disk = NULL; - mdev = drbd_new_device(minor); - - spin_lock_irq(&drbd_pp_lock); - if (minor_table[minor] == NULL) { - minor_table[minor] = mdev; - disk = mdev->vdisk; - mdev = NULL; - } /* else: we lost the race */ - spin_unlock_irq(&drbd_pp_lock); - - if (disk) /* we won the race above */ - /* in case we ever add a drbd_delete_device(), - * don't forget the del_gendisk! */ - add_disk(disk); - else /* we lost the race above */ - drbd_free_mdev(mdev); - - mdev = minor_to_mdev(minor); - } - - return mdev; -} - static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { @@ -1789,12 +1755,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) sc.al_extents = DRBD_AL_EXTENTS_MAX; - /* to avoid spurious errors when configuring minors before configuring - * the minors they depend on: if necessary, first create the minor we - * depend on */ - if (sc.after >= 0) - ensure_mdev(sc.after, 1); - /* most sanity checks done, try to assign the new sync-after * dependency. need to hold the global lock in there, * to avoid a race in the dependency loop check. */ @@ -2184,13 +2144,73 @@ out: return 0; } +static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_connection args; + + if (!new_connection_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = NO_ERROR; + if (!drbd_new_tconn(args.name)) + reply->ret_code = ERR_NOMEM; + + return 0; +} + +static int drbd_nl_new_minor(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_minor args; + + args.vol_nr = 0; + args.minor = 0; + + if (!new_minor_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); + + return 0; +} + +static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_MINOR_CONFIGURED; + } + return 0; +} + +static int drbd_nl_del_conn(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + if (conn_lowest_minor(tconn) < 0) { + drbd_free_tconn(tconn); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_CONN_IN_USE; + } + + return 0; +} + enum cn_handler_type { CHT_MINOR, CHT_CONN, CHT_CTOR, /* CHT_RES, later */ }; - struct cn_handler_struct { enum cn_handler_type type; union { @@ -2235,6 +2255,10 @@ static struct cn_handler_struct cnd_table[] = { sizeof(struct get_timeout_flag_tag_len_struct)}, [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, + [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, + [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, + [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, + [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7683b4ab6583..e192167e6145 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -156,6 +156,9 @@ enum drbd_ret_code { ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, ERR_CONN_NOT_KNOWN = 158, + ERR_CONN_IN_USE = 159, + ERR_MINOR_CONFIGURED = 160, + ERR_MINOR_EXISTS = 161, /* insert new ones above this line */ AFTER_LAST_ERR_CODE diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ab6159e4fcf0..1216c7a432c5 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -152,6 +152,18 @@ NL_PACKET(new_c_uuid, 26, NL_RESPONSE(return_code_only, 27) #endif +NL_PACKET(new_connection, 28, /* CHT_CTOR */ + NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) +) + +NL_PACKET(new_minor, 29, /* CHT_CONN */ + NL_INTEGER( 86, T_MANDATORY, minor) + NL_INTEGER( 87, T_MANDATORY, vol_nr) +) + +NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ +NL_PACKET(del_connection, 31, ) /* CHT_CONN */ + #undef NL_PACKET #undef NL_INTEGER #undef NL_INT64 -- cgit v1.2.3 From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:51:43 +0100 Subject: drbd: default to detach on-io-error Old default behaviour was "pass-on", which is not useful in production at all. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 75f05af33725..22920a8af4e2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,7 +125,7 @@ #define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ -#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT -- cgit v1.2.3 From ec2c35ac1ea288f5c931e32452ecea50068e8450 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:20:08 +0100 Subject: drbd: prepare the transition from connector to genetlink This adds the new API header and helper files. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 349 +++++++++++++++++++++++++++++++ include/linux/drbd_genl_api.h | 55 +++++ include/linux/genl_magic_func.h | 417 ++++++++++++++++++++++++++++++++++++++ include/linux/genl_magic_struct.h | 260 ++++++++++++++++++++++++ 4 files changed, 1081 insertions(+) create mode 100644 include/linux/drbd_genl.h create mode 100644 include/linux/drbd_genl_api.h create mode 100644 include/linux/genl_magic_func.h create mode 100644 include/linux/genl_magic_struct.h (limited to 'include/linux') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h new file mode 100644 index 000000000000..84e16848f7a1 --- /dev/null +++ b/include/linux/drbd_genl.h @@ -0,0 +1,349 @@ +/* + * General overview: + * full generic netlink message: + * |nlmsghdr|genlmsghdr| + * + * payload: + * |optional fixed size family header| + * + * sequence of netlink attributes: + * I chose to have all "top level" attributes NLA_NESTED, + * corresponding to some real struct. + * So we have a sequence of |tla, len| + * + * nested nla sequence: + * may be empty, or contain a sequence of netlink attributes + * representing the struct fields. + * + * The tag number of any field (regardless of containing struct) + * will be available as T_ ## field_name, + * so you cannot have the same field name in two differnt structs. + * + * The tag numbers themselves are per struct, though, + * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type, + * which we won't use here). + * The tag numbers are used as index in the respective nla_policy array. + * + * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy + * genl_magic_struct.h + * generates the struct declaration, + * generates an entry in the tla enum, + * genl_magic_func.h + * generates an entry in the static tla policy + * with .type = NLA_NESTED + * generates the static _nl_policy definition, + * and static conversion functions + * + * genl_magic_func.h + * + * GENL_mc_group(group) + * genl_magic_struct.h + * does nothing + * genl_magic_func.h + * defines and registers the mcast group, + * and provides a send helper + * + * GENL_notification(op_name, op_num, mcast_group, tla list) + * These are notifications to userspace. + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * does nothing + * + * mcast group: the name of the mcast group this notification should be + * expected on + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + * + * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations" + * These are requests from userspace. + * + * _op and _notification share the same "number space", + * op_nr will be assigned to "genlmsghdr->cmd" + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * generates an entry in the static genl_ops array, + * and static register/unregister functions to + * genl_register_family_with_ops(). + * + * flags and handler: + * GENL_op_init( .doit = x, .dumpit = y, .flags = something) + * GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + */ + +/* + * STRUCTS + */ + +/* this is sent kernel -> userland on various error conditions, and contains + * informational textual info, which is supposedly human readable. + * The computer relevant return code is in the drbd_genlmsghdr. + */ +GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, + /* "arbitrary" size strings, nla_policy.len = 0 */ + __str_field(1, GENLA_F_MANDATORY, info_text, 0) +) + +/* Configuration requests typically need a context to operate on. + * Possible keys are device minor (fits in the drbd_genlmsghdr), + * the replication link (aka connection) name, + * and/or the replication group (aka resource) name, + * and the volume id within the resource. */ +GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, + /* currently only 256 volumes per group, + * but maybe we still change that */ + __u32_field(1, GENLA_F_MANDATORY, ctx_volume) + __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) +) + +GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, + __u64_field(1, GENLA_F_MANDATORY, disk_size) + __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) + __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) + __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) + __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) + __u32_field(7, GENLA_F_MANDATORY, fencing) + __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(11, GENLA_F_MANDATORY, no_md_flush) + __flg_field(12, GENLA_F_MANDATORY, use_bmbv) +) + +GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, + __u32_field(1, GENLA_F_MANDATORY, rate) + __u32_field(2, GENLA_F_MANDATORY, after) + __u32_field(3, GENLA_F_MANDATORY, al_extents) + __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) + __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __flg_field(7, GENLA_F_MANDATORY, use_rle) + __u32_field(8, GENLA_F_MANDATORY, on_no_data) + __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(10, GENLA_F_MANDATORY, c_delay_target) + __u32_field(11, GENLA_F_MANDATORY, c_fill_target) + __u32_field(12, GENLA_F_MANDATORY, c_max_rate) + __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +) + +GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, + __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + shared_secret, SHARED_SECRET_MAX) + __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(4, GENLA_F_REQUIRED, my_addr, 128) + __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) + __u32_field(6, GENLA_F_REQUIRED, wire_protocol) + __u32_field(7, GENLA_F_MANDATORY, try_connect_int) + __u32_field(8, GENLA_F_MANDATORY, timeout) + __u32_field(9, GENLA_F_MANDATORY, ping_int) + __u32_field(10, GENLA_F_MANDATORY, ping_timeo) + __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(13, GENLA_F_MANDATORY, ko_count) + __u32_field(14, GENLA_F_MANDATORY, max_buffers) + __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(20, GENLA_F_MANDATORY, rr_conflict) + __u32_field(21, GENLA_F_MANDATORY, on_congestion) + __u32_field(22, GENLA_F_MANDATORY, cong_fill) + __u32_field(23, GENLA_F_MANDATORY, cong_extents) + __flg_field(24, GENLA_F_MANDATORY, two_primaries) + __flg_field(25, GENLA_F_MANDATORY, want_lose) + __flg_field(26, GENLA_F_MANDATORY, no_cork) + __flg_field(27, GENLA_F_MANDATORY, always_asbp) + __flg_field(28, GENLA_F_MANDATORY, dry_run) +) + +GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, + __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) +) + +GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, + __u64_field(1, GENLA_F_MANDATORY, resize_size) + __flg_field(2, GENLA_F_MANDATORY, resize_force) + __flg_field(3, GENLA_F_MANDATORY, no_resync) +) + +GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, + /* the reason of the broadcast, + * if this is an event triggered broadcast. */ + __u32_field(1, GENLA_F_MANDATORY, sib_reason) + __u32_field(2, GENLA_F_REQUIRED, current_state) + __u64_field(3, GENLA_F_MANDATORY, capacity) + __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + + /* These are for broadcast from after state change work. + * prev_state and new_state are from the moment the state change took + * place, new_state is not neccessarily the same as current_state, + * there may have been more state changes since. Which will be + * broadcasted soon, in their respective after state change work. */ + __u32_field(5, GENLA_F_MANDATORY, prev_state) + __u32_field(6, GENLA_F_MANDATORY, new_state) + + /* if we have a local disk: */ + __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, GENLA_F_MANDATORY, disk_flags) + __u64_field(9, GENLA_F_MANDATORY, bits_total) + __u64_field(10, GENLA_F_MANDATORY, bits_oos) + /* and in case resync or online verify is active */ + __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + + /* for pre and post notifications of helper execution */ + __str_field(13, GENLA_F_MANDATORY, helper, 32) + __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) +) + +GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, + __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) +) + +GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, + __flg_field(1, GENLA_F_MANDATORY, clear_bm) +) + +GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, + __u32_field(1, GENLA_F_REQUIRED, timeout_type) +) + +GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, + __flg_field(1, GENLA_F_MANDATORY, force_disconnect) +) + +/* + * Notifications and commands (genlmsghdr->cmd) + */ +GENL_mc_group(events) + + /* kernel -> userspace announcement of changes */ +GENL_notification( + DRBD_EVENT, 1, events, + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + + /* query kernel for specific or all info */ +GENL_op( + DRBD_ADM_GET_STATUS, 2, + GENL_op_init( + .doit = drbd_adm_get_status, + .dumpit = drbd_adm_get_status_all, + /* anyone may ask for the status, + * it is broadcasted anyways */ + ), + /* To select the object .doit. + * Or a subset of objects in .dumpit. */ + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) +) + +#if 0 + /* TO BE DONE */ + /* create or destroy resources, aka replication groups */ +GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +#endif + + /* add DRBD minor devices as volumes to resources */ +GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* add or delete replication links to resources */ +GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on replication links */ +GENL_op(DRBD_ADM_SYNCER, 9, + GENL_doit(drbd_adm_syncer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_CONNECT, 10, + GENL_doit(drbd_adm_connect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + +GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on minors */ +GENL_op(DRBD_ADM_ATTACH, 12, + GENL_doit(drbd_adm_attach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_RESIZE, 13, + GENL_doit(drbd_adm_resize), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) +) + + /* operates on all volumes within a resource */ +GENL_op( + DRBD_ADM_PRIMARY, 14, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_SECONDARY, 15, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_NEW_C_UUID, 16, + GENL_doit(drbd_adm_new_c_uuid), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_START_OV, 17, + GENL_doit(drbd_adm_start_ov), + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) +) + +GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h new file mode 100644 index 000000000000..9ef50d51e34e --- /dev/null +++ b/include/linux/drbd_genl_api.h @@ -0,0 +1,55 @@ +#ifndef DRBD_GENL_STRUCT_H +#define DRBD_GENL_STRUCT_H + +/** + * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests + * @minor: + * For admin requests (user -> kernel): which minor device to operate on. + * For (unicast) replies or informational (broadcast) messages + * (kernel -> user): which minor device the information is about. + * If we do not operate on minors, but on connections or resources, + * the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT + * is used instead. + * @flags: possible operation modifiers (relevant only for user->kernel): + * DRBD_GENL_F_SET_DEFAULTS + * @volume: + * When creating a new minor (adding it to a resource), the resource needs + * to know which volume number within the resource this is supposed to be. + * The volume number corresponds to the same volume number on the remote side, + * whereas the minor number on the remote side may be different + * (union with flags). + * @ret_code: kernel->userland unicast cfg reply return code (union with flags); + */ +struct drbd_genlmsghdr { + __u32 minor; + union { + __u32 flags; + __s32 ret_code; + }; +}; + +/* To be used in drbd_genlmsghdr.flags */ +enum { + DRBD_GENL_F_SET_DEFAULTS = 1, +}; + +enum drbd_state_info_bcast_reason { + SIB_GET_STATUS_REPLY = 1, + SIB_STATE_CHANGE = 2, + SIB_HELPER_PRE = 3, + SIB_HELPER_POST = 4, + SIB_SYNC_PROGRESS = 5, +}; + +/* hack around predefined gcc/cpp "linux=1", + * we cannot possibly include <1/drbd_genl.h> */ +#undef linux + +#include +#define GENL_MAGIC_VERSION API_VERSION +#define GENL_MAGIC_FAMILY drbd +#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr) +#define GENL_MAGIC_INCLUDE_FILE +#include + +#endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h new file mode 100644 index 000000000000..8a86f659d363 --- /dev/null +++ b/include/linux/genl_magic_func.h @@ -0,0 +1,417 @@ +#ifndef GENL_MAGIC_FUNC_H +#define GENL_MAGIC_FUNC_H + +#include + +/* + * Extension of genl attribute validation policies {{{1 + * {{{2 + */ + +/** + * nla_is_required - return true if this attribute is required + * @nla: netlink attribute + */ +static inline int nla_is_required(const struct nlattr *nla) +{ + return nla->nla_type & GENLA_F_REQUIRED; +} + +/** + * nla_is_mandatory - return true if understanding this attribute is mandatory + * @nla: netlink attribute + * Note: REQUIRED attributes are implicitly MANDATORY as well + */ +static inline int nla_is_mandatory(const struct nlattr *nla) +{ + return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); +} + +/* Functionality to be integrated into nla_parse(), and validate_nla(), + * respectively. + * + * Enforcing the "mandatory" bit is done here, + * by rejecting unknown mandatory attributes. + * + * Part of enforcing the "required" flag would mean to embed it into + * nla_policy.type, and extending validate_nla(), which currently does + * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, + * so we cannot do that. Thats why enforcing "required" is done in the + * generated assignment functions below. */ +static int nla_check_unknown(int maxtype, struct nlattr *head, int len) +{ + struct nlattr *nla; + int rem; + nla_for_each_attr(nla, head, len, rem) { + __u16 type = nla_type(nla); + if (type > maxtype && nla_is_mandatory(nla)) + return -EOPNOTSUPP; + } + return 0; +} + +/* + * Magic: declare tla policy {{{1 + * Magic: declare nested policies + * {{{2 + */ +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + [tag_name] = { .type = NLA_NESTED }, + +static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ +{ s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type }, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ + __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, + +#include GENL_MAGIC_INCLUDE_FILE + +#ifndef __KERNEL__ +#ifndef pr_info +#define pr_info(args...) fprintf(stderr, args); +#endif +#endif + +#if 1 +static void dprint_field(const char *dir, int nla_type, + const char *name, void *valp) +{ + __u64 val = valp ? *(__u32 *)valp : 1; + switch (nla_type) { + case NLA_U8: val = (__u8)val; + case NLA_U16: val = (__u16)val; + case NLA_U32: val = (__u32)val; + pr_info("%s attr %s: %d 0x%08x\n", dir, + name, (int)val, (unsigned)val); + break; + case NLA_U64: + val = *(__u64*)valp; + pr_info("%s attr %s: %lld 0x%08llx\n", dir, + name, (long long)val, (unsigned long long)val); + break; + case NLA_FLAG: + if (val) + pr_info("%s attr %s: set\n", dir, name); + break; + } +} + +static void dprint_array(const char *dir, int nla_type, + const char *name, const char *val, unsigned len) +{ + switch (nla_type) { + case NLA_NUL_STRING: + if (len && val[len-1] == '\0') + len--; + pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val); + break; + default: + /* we can always show 4 byte, + * thats what nlattr are aligned to. */ + pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n", + dir, name, len, val[0], val[1], val[2], val[3]); + } +} + +#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b); + +/* Name is a member field name of the struct s. + * If s is NULL (only parsing, no copy requested in *_from_attrs()), + * nla is supposed to point to the attribute containing the information + * corresponding to that struct member. */ +#define DPRINT_FIELD(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_field(dir, nla_type, #name, &s->name); \ + else if (nla) \ + dprint_field(dir, nla_type, #name, \ + (nla_type == NLA_FLAG) ? NULL \ + : nla_data(nla)); \ + } while (0) + +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_array(dir, nla_type, #name, \ + s->name, s->name ## _len); \ + else if (nla) \ + dprint_array(dir, nla_type, #name, \ + nla_data(nla), nla_len(nla)); \ + } while (0) +#else +#define DPRINT_TLA(a, op, b) do {} while (0) +#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0) +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0) +#endif + +/* + * Magic: provide conversion functions {{{1 + * populate struct from attribute table: + * {{{2 + */ + +/* processing of generic netlink messages is serialized. + * use one static buffer for parsing of nested attributes */ +static struct nlattr *nested_attr_tb[128]; + +#ifndef BUILD_BUG_ON +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#endif + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + /* static, potentially unused */ \ +int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +{ \ + const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ + struct nlattr *tla = tb[tag_number]; \ + struct nlattr **ntb = nested_attr_tb; \ + struct nlattr *nla; \ + int err; \ + BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \ + if (!tla) \ + return -ENOMSG; \ + DPRINT_TLA(#s_name, "<=-", #tag_name); \ + err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + if (err) \ + return err; \ + err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ + if (err) \ + return err; \ + \ + s_fields \ + return 0; \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } + +/* validate_nla() already checked nla_len <= maxlen appropriately. */ +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name ## _len = \ + __get(s->name, nla, maxlen); \ + DPRINT_ARRAY("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } \ + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +/* + * Magic: define op number to op name mapping {{{1 + * {{{2 + */ +const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +{ + switch (cmd) { +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + case op_num: return #op_name; +#include GENL_MAGIC_INCLUDE_FILE + default: + return "unknown"; + } +} + +#ifdef __KERNEL__ +#include +/* + * Magic: define genl_ops {{{1 + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ +{ \ + handler \ + .cmd = op_name, \ + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ +}, + +#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +static struct genl_ops ZZZ_genl_ops[] __read_mostly = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +/* + * Define the genl_family, multicast groups, {{{1 + * and provide register/unregister functions. + * {{{2 + */ +#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +static struct genl_family ZZZ_genl_family __read_mostly = { + .id = GENL_ID_GENERATE, + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, +}; + +/* + * Magic: define multicast groups + * Magic: define multicast group registration helper + */ +#undef GENL_mc_group +#define GENL_mc_group(group) \ +static struct genl_multicast_group \ +CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \ + .name = #group, \ +}; \ +static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ + struct sk_buff *skb, gfp_t flags) \ +{ \ + unsigned int group_id = \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \ + if (!group_id) \ + return -EINVAL; \ + return genlmsg_multicast(skb, 0, group_id, flags); \ +} + +#include GENL_MAGIC_INCLUDE_FILE + +int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +{ + int err = genl_register_family_with_ops(&ZZZ_genl_family, + ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops)); + if (err) + return err; +#undef GENL_mc_group +#define GENL_mc_group(group) \ + err = genl_register_mc_group(&ZZZ_genl_family, \ + &CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \ + if (err) \ + goto fail; \ + else \ + pr_info("%s: mcg %s: %u\n", #group, \ + __stringify(GENL_MAGIC_FAMILY), \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id); + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_mc_group +#define GENL_mc_group(group) + return 0; +fail: + genl_unregister_family(&ZZZ_genl_family); + return err; +} + +void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +{ + genl_unregister_family(&ZZZ_genl_family); +} + +/* + * Magic: provide conversion functions {{{1 + * populate skb from struct. + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \ + const bool exclude_sensitive) \ +{ \ + struct nlattr *tla = nla_nest_start(skb, tag_number); \ + if (!tla) \ + goto nla_put_failure; \ + DPRINT_TLA(#s_name, "-=>", #tag_name); \ + s_fields \ + nla_nest_end(skb, tla); \ + return 0; \ + \ +nla_put_failure: \ + if (tla) \ + nla_nest_cancel(skb, tla); \ + return -EMSGSIZE; \ +} \ +static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 0); \ +} \ +static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 1); \ +} + + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_FIELD(">>", nla_type, name, s, NULL); \ + __put(skb, attr_nr, s->name); \ + } + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ + __put(skb, attr_nr, min_t(int, maxlen, \ + s->name ## _len + (nla_type == NLA_NUL_STRING)),\ + s->name); \ + } + +#include GENL_MAGIC_INCLUDE_FILE + +#endif /* __KERNEL__ */ + +/* }}}1 */ +#endif /* GENL_MAGIC_FUNC_H */ +/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h new file mode 100644 index 000000000000..745ebfd6c7e5 --- /dev/null +++ b/include/linux/genl_magic_struct.h @@ -0,0 +1,260 @@ +#ifndef GENL_MAGIC_STRUCT_H +#define GENL_MAGIC_STRUCT_H + +#ifndef GENL_MAGIC_FAMILY +# error "you need to define GENL_MAGIC_FAMILY before inclusion" +#endif + +#ifndef GENL_MAGIC_VERSION +# error "you need to define GENL_MAGIC_VERSION before inclusion" +#endif + +#ifndef GENL_MAGIC_INCLUDE_FILE +# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" +#endif + +#include +#include + +#define CONCAT__(a,b) a ## b +#define CONCAT_(a,b) CONCAT__(a,b) + +extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); + +/* + * Extension of genl attribute validation policies {{{2 + */ + +/** + * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement + * + * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. + * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. + * + * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. + * Without this, unknown attributes (> maxtype) are _silently_ ignored + * by validate_nla(). + * + * To be used for API extensions, so older kernel can reject requests for not + * yet implemented features, if newer userland tries to use them even though + * the genl_family version clearly indicates they are not available. + * + * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. + * To be used for API extensions for things that have sane defaults, + * so newer userland can still talk to older kernel, knowing it will + * silently ignore these attributes if not yet known. + * + * NOTE: These flags overload + * NLA_F_NESTED (1 << 15) + * NLA_F_NET_BYTEORDER (1 << 14) + * from linux/netlink.h, which are not useful for validate_nla(): + * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting + * .type = NLA_NESTED in the appropriate policy. + * + * See also: nla_type() + */ +enum { + GENLA_F_MAY_IGNORE = 0, + GENLA_F_MANDATORY = 1 << 14, + GENLA_F_REQUIRED = 1 << 15, + + /* This will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb, to exclude "sensitive" + * information from broadcasts, or on unpriviledged get requests. + * This is useful because genetlink multicast groups can be listened in + * on by anyone. */ + GENLA_F_SENSITIVE = 1 << 16, +}; + +#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) + +/* }}}1 + * MAGIC + * multi-include macro expansion magic starts here + */ + +/* MAGIC helpers {{{2 */ + +/* possible field types */ +#define __flg_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ + nla_get_flag, __nla_put_flag) +#define __u8_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ + nla_get_u8, NLA_PUT_U8) +#define __u16_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ + nla_get_u16, NLA_PUT_U16) +#define __u32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ + nla_get_u32, NLA_PUT_U32) +#define __u64_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ + nla_get_u64, NLA_PUT_U64) +#define __str_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ + nla_strlcpy, NLA_PUT) +#define __bin_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ + nla_memcpy, NLA_PUT) + +#define __nla_put_flag(skb, attrtype, value) \ + do { \ + if (value) \ + NLA_PUT_FLAG(skb, attrtype); \ + } while (0) + +#define GENL_op_init(args...) args +#define GENL_doit(handler) \ + .doit = handler, \ + .flags = GENL_ADMIN_PERM, +#define GENL_dumpit(handler) \ + .dumpit = handler, \ + .flags = GENL_ADMIN_PERM, + +/* }}}1 + * Magic: define the enum symbols for genl_ops + * Magic: define the enum symbols for top level attributes + * Magic: define the enum symbols for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + op_name = op_num, + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + op_name = op_num, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + tag_name = tag_number, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: compile time assert unique numbers for operations + * Magic: -"- unique numbers for top level attributes + * Magic: -"- unique numbers for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) \ + case op_name: + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + case op_name: + +static inline void ct_assert_unique_operations(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + case tag_number: + +static inline void ct_assert_unique_top_level_attributes(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ +{ \ + switch (0) { \ + s_fields \ + ; \ + } \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + case attr_nr: + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + case attr_nr: + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: declare structs + * struct { + * fields + * }; + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +struct s_name { s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + type name; + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + type name[maxlen]; \ + __u32 name ## _len; + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 */ +#endif /* GENL_MAGIC_STRUCT_H */ +/* vim: set foldmethod=marker nofoldenable : */ -- cgit v1.2.3 From d4f65b5d2497b2fd9c45f06b71deb4ab084a5b66 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Sep 2012 13:06:29 +0100 Subject: KEYS: Add payload preparsing opportunity prior to key instantiate or update Give the key type the opportunity to preparse the payload prior to the instantiation and update routines being called. This is done with the provision of two new key type operations: int (*preparse)(struct key_preparsed_payload *prep); void (*free_preparse)(struct key_preparsed_payload *prep); If the first operation is present, then it is called before key creation (in the add/update case) or before the key semaphore is taken (in the update and instantiate cases). The second operation is called to clean up if the first was called. preparse() is given the opportunity to fill in the following structure: struct key_preparsed_payload { char *description; void *type_data[2]; void *payload; const void *data; size_t datalen; size_t quotalen; }; Before the preparser is called, the first three fields will have been cleared, the payload pointer and size will be stored in data and datalen and the default quota size from the key_type struct will be stored into quotalen. The preparser may parse the payload in any way it likes and may store data in the type_data[] and payload fields for use by the instantiate() and update() ops. The preparser may also propose a description for the key by attaching it as a string to the description field. This can be used by passing a NULL or "" description to the add_key() system call or the key_create_or_update() function. This cannot work with request_key() as that required the description to tell the upcall about the key to be created. This, for example permits keys that store PGP public keys to generate their own name from the user ID and public key fingerprint in the key. The instantiate() and update() operations are then modified to look like this: int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); int (*update)(struct key *key, struct key_preparsed_payload *prep); and the new payload data is passed in *prep, whether or not it was preparsed. Signed-off-by: David Howells --- Documentation/security/keys.txt | 50 +++++++++++++- fs/cifs/cifs_spnego.c | 6 +- fs/cifs/cifsacl.c | 8 +-- include/keys/user-type.h | 6 +- include/linux/key-type.h | 35 +++++++++- net/ceph/crypto.c | 9 +-- net/dns_resolver/dns_key.c | 6 +- net/rxrpc/ar-key.c | 40 +++++------ security/keys/encrypted-keys/encrypted.c | 16 +++-- security/keys/key.c | 114 ++++++++++++++++++++++--------- security/keys/keyctl.c | 18 +++-- security/keys/keyring.c | 6 +- security/keys/request_key_auth.c | 8 +-- security/keys/trusted.c | 16 +++-- security/keys/user_defined.c | 14 ++-- 15 files changed, 250 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index aa0dbd74b71b..7d9ca92022d8 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -412,6 +412,10 @@ The main syscalls are: to the keyring. In this case, an error will be generated if the process does not have permission to write to the keyring. + If the key type supports it, if the description is NULL or an empty + string, the key type will try and generate a description from the content + of the payload. + The payload is optional, and the pointer can be NULL if not required by the type. The payload is plen in size, and plen can be zero for an empty payload. @@ -1114,12 +1118,53 @@ The structure has a number of fields, some of which are mandatory: it should return 0. - (*) int (*instantiate)(struct key *key, const void *data, size_t datalen); + (*) int (*preparse)(struct key_preparsed_payload *prep); + + This optional method permits the key type to attempt to parse payload + before a key is created (add key) or the key semaphore is taken (update or + instantiate key). The structure pointed to by prep looks like: + + struct key_preparsed_payload { + char *description; + void *type_data[2]; + void *payload; + const void *data; + size_t datalen; + size_t quotalen; + }; + + Before calling the method, the caller will fill in data and datalen with + the payload blob parameters; quotalen will be filled in with the default + quota size from the key type and the rest will be cleared. + + If a description can be proposed from the payload contents, that should be + attached as a string to the description field. This will be used for the + key description if the caller of add_key() passes NULL or "". + + The method can attach anything it likes to type_data[] and payload. These + are merely passed along to the instantiate() or update() operations. + + The method should return 0 if success ful or a negative error code + otherwise. + + + (*) void (*free_preparse)(struct key_preparsed_payload *prep); + + This method is only required if the preparse() method is provided, + otherwise it is unused. It cleans up anything attached to the + description, type_data and payload fields of the key_preparsed_payload + struct as filled in by the preparse() method. + + + (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); This method is called to attach a payload to a key during construction. The payload attached need not bear any relation to the data passed to this function. + The prep->data and prep->datalen fields will define the original payload + blob. If preparse() was supplied then other fields may be filled in also. + If the amount of data attached to the key differs from the size in keytype->def_datalen, then key_payload_reserve() should be called. @@ -1135,6 +1180,9 @@ The structure has a number of fields, some of which are mandatory: If this type of key can be updated, then this method should be provided. It is called to update a key's payload from the blob of data provided. + The prep->data and prep->datalen fields will define the original payload + blob. If preparse() was supplied then other fields may be filled in also. + key_payload_reserve() should be called if the data length might change before any changes are actually made. Note that if this succeeds, the type is committed to changing the key because it's already been altered, so all diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index e622863b292f..086f381d6489 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -31,18 +31,18 @@ /* create a new cifs key */ static int -cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen) +cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { char *payload; int ret; ret = -ENOMEM; - payload = kmalloc(datalen, GFP_KERNEL); + payload = kmalloc(prep->datalen, GFP_KERNEL); if (!payload) goto error; /* attach the data */ - memcpy(payload, data, datalen); + memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; ret = 0; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 05f4dc263a23..f3c60e264ca8 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -167,17 +167,17 @@ static struct shrinker cifs_shrinker = { }; static int -cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen) +cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { char *payload; - payload = kmalloc(datalen, GFP_KERNEL); + payload = kmalloc(prep->datalen, GFP_KERNEL); if (!payload) return -ENOMEM; - memcpy(payload, data, datalen); + memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; - key->datalen = datalen; + key->datalen = prep->datalen; return 0; } diff --git a/include/keys/user-type.h b/include/keys/user-type.h index bc9ec1d7698c..5e452c84f1e6 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -35,8 +35,10 @@ struct user_key_payload { extern struct key_type key_type_user; extern struct key_type key_type_logon; -extern int user_instantiate(struct key *key, const void *data, size_t datalen); -extern int user_update(struct key *key, const void *data, size_t datalen); +struct key_preparsed_payload; + +extern int user_instantiate(struct key *key, struct key_preparsed_payload *prep); +extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern int user_match(const struct key *key, const void *criterion); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); diff --git a/include/linux/key-type.h b/include/linux/key-type.h index f0c651cda7b0..518a53afb9ea 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -26,6 +26,27 @@ struct key_construction { struct key *authkey;/* authorisation for key being constructed */ }; +/* + * Pre-parsed payload, used by key add, update and instantiate. + * + * This struct will be cleared and data and datalen will be set with the data + * and length parameters from the caller and quotalen will be set from + * def_datalen from the key type. Then if the preparse() op is provided by the + * key type, that will be called. Then the struct will be passed to the + * instantiate() or the update() op. + * + * If the preparse() op is given, the free_preparse() op will be called to + * clear the contents. + */ +struct key_preparsed_payload { + char *description; /* Proposed key description (or NULL) */ + void *type_data[2]; /* Private key-type data */ + void *payload; /* Proposed payload */ + const void *data; /* Raw data */ + size_t datalen; /* Raw datalen */ + size_t quotalen; /* Quota length for proposed payload */ +}; + typedef int (*request_key_actor_t)(struct key_construction *key, const char *op, void *aux); @@ -45,18 +66,28 @@ struct key_type { /* vet a description */ int (*vet_description)(const char *description); + /* Preparse the data blob from userspace that is to be the payload, + * generating a proposed description and payload that will be handed to + * the instantiate() and update() ops. + */ + int (*preparse)(struct key_preparsed_payload *prep); + + /* Free a preparse data structure. + */ + void (*free_preparse)(struct key_preparsed_payload *prep); + /* instantiate a key of this type * - this method should call key_payload_reserve() to determine if the * user's quota will hold the payload */ - int (*instantiate)(struct key *key, const void *data, size_t datalen); + int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); /* update a key of this type (optional) * - this method should call key_payload_reserve() to recalculate the * quota consumption * - the key must be locked against read when modifying */ - int (*update)(struct key *key, const void *data, size_t datalen); + int (*update)(struct key *key, struct key_preparsed_payload *prep); /* match a key against a description */ int (*match)(const struct key *key, const void *desc); diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 9da7fdd3cd8a..af14cb425164 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -423,14 +423,15 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) +int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; + size_t datalen = prep->datalen; int ret; void *p; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto err; ret = key_payload_reserve(key, datalen); @@ -443,8 +444,8 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) goto err; /* TODO ceph_crypto_key_decode should really take const input */ - p = (void *)data; - ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); + p = (void *)prep->data; + ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen); if (ret < 0) goto err_ckey; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index d9507dd05818..859ab8b6ec34 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -59,13 +59,13 @@ const struct cred *dns_resolver_cache; * "ip1,ip2,...#foo=bar" */ static int -dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload; unsigned long derrno; int ret; - size_t result_len = 0; - const char *data = _data, *end, *opt; + size_t datalen = prep->datalen, result_len = 0; + const char *data = prep->data, *end, *opt; kenter("%%%d,%s,'%*.*s',%zu", key->serial, key->description, diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 8b1f9f49960f..106c5a6b1ab2 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -26,8 +26,8 @@ #include "ar-internal.h" static int rxrpc_vet_description_s(const char *); -static int rxrpc_instantiate(struct key *, const void *, size_t); -static int rxrpc_instantiate_s(struct key *, const void *, size_t); +static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *); +static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); @@ -678,7 +678,7 @@ error: * * if no data is provided, then a no-security key is made */ -static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) +static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep) { const struct rxrpc_key_data_v1 *v1; struct rxrpc_key_token *token, **pp; @@ -686,26 +686,26 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) u32 kver; int ret; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); /* handle a no-security key */ - if (!data && datalen == 0) + if (!prep->data && prep->datalen == 0) return 0; /* determine if the XDR payload format is being used */ - if (datalen > 7 * 4) { - ret = rxrpc_instantiate_xdr(key, data, datalen); + if (prep->datalen > 7 * 4) { + ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen); if (ret != -EPROTO) return ret; } /* get the key interface version number */ ret = -EINVAL; - if (datalen <= 4 || !data) + if (prep->datalen <= 4 || !prep->data) goto error; - memcpy(&kver, data, sizeof(kver)); - data += sizeof(kver); - datalen -= sizeof(kver); + memcpy(&kver, prep->data, sizeof(kver)); + prep->data += sizeof(kver); + prep->datalen -= sizeof(kver); _debug("KEY I/F VERSION: %u", kver); @@ -715,11 +715,11 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) /* deal with a version 1 key */ ret = -EINVAL; - if (datalen < sizeof(*v1)) + if (prep->datalen < sizeof(*v1)) goto error; - v1 = data; - if (datalen != sizeof(*v1) + v1->ticket_length) + v1 = prep->data; + if (prep->datalen != sizeof(*v1) + v1->ticket_length) goto error; _debug("SCIX: %u", v1->security_index); @@ -784,17 +784,17 @@ error: * instantiate a server secret key * data should be a pointer to the 8-byte secret key */ -static int rxrpc_instantiate_s(struct key *key, const void *data, - size_t datalen) +static int rxrpc_instantiate_s(struct key *key, + struct key_preparsed_payload *prep) { struct crypto_blkcipher *ci; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); - if (datalen != 8) + if (prep->datalen != 8) return -EINVAL; - memcpy(&key->type_data, data, 8); + memcpy(&key->type_data, prep->data, 8); ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(ci)) { @@ -802,7 +802,7 @@ static int rxrpc_instantiate_s(struct key *key, const void *data, return PTR_ERR(ci); } - if (crypto_blkcipher_setkey(ci, data, 8) < 0) + if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) BUG(); key->payload.data = ci; diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 2d1bb8af7696..9e1e005c7596 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -773,8 +773,8 @@ static int encrypted_init(struct encrypted_key_payload *epayload, * * On success, return 0. Otherwise return errno. */ -static int encrypted_instantiate(struct key *key, const void *data, - size_t datalen) +static int encrypted_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = NULL; char *datablob = NULL; @@ -782,16 +782,17 @@ static int encrypted_instantiate(struct key *key, const void *data, char *master_desc = NULL; char *decrypted_datalen = NULL; char *hex_encoded_iv = NULL; + size_t datalen = prep->datalen; int ret; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; datablob[datalen] = 0; - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); ret = datablob_parse(datablob, &format, &master_desc, &decrypted_datalen, &hex_encoded_iv); if (ret < 0) @@ -834,16 +835,17 @@ static void encrypted_rcu_free(struct rcu_head *rcu) * * On success, return 0. Otherwise return errno. */ -static int encrypted_update(struct key *key, const void *data, size_t datalen) +static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = key->payload.data; struct encrypted_key_payload *new_epayload; char *buf; char *new_master_desc = NULL; const char *format = NULL; + size_t datalen = prep->datalen; int ret = 0; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; buf = kmalloc(datalen + 1, GFP_KERNEL); @@ -851,7 +853,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen) return -ENOMEM; buf[datalen] = 0; - memcpy(buf, data, datalen); + memcpy(buf, prep->data, datalen); ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL); if (ret < 0) goto out; diff --git a/security/keys/key.c b/security/keys/key.c index 50d96d4e06f2..1d039af99f50 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -412,8 +412,7 @@ EXPORT_SYMBOL(key_payload_reserve); * key_construction_mutex. */ static int __key_instantiate_and_link(struct key *key, - const void *data, - size_t datalen, + struct key_preparsed_payload *prep, struct key *keyring, struct key *authkey, unsigned long *_prealloc) @@ -431,7 +430,7 @@ static int __key_instantiate_and_link(struct key *key, /* can't instantiate twice */ if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { /* instantiate the key */ - ret = key->type->instantiate(key, data, datalen); + ret = key->type->instantiate(key, prep); if (ret == 0) { /* mark the key as being instantiated */ @@ -482,22 +481,37 @@ int key_instantiate_and_link(struct key *key, struct key *keyring, struct key *authkey) { + struct key_preparsed_payload prep; unsigned long prealloc; int ret; + memset(&prep, 0, sizeof(prep)); + prep.data = data; + prep.datalen = datalen; + prep.quotalen = key->type->def_datalen; + if (key->type->preparse) { + ret = key->type->preparse(&prep); + if (ret < 0) + goto error; + } + if (keyring) { ret = __key_link_begin(keyring, key->type, key->description, &prealloc); if (ret < 0) - return ret; + goto error_free_preparse; } - ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey, + ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &prealloc); if (keyring) __key_link_end(keyring, key->type, prealloc); +error_free_preparse: + if (key->type->preparse) + key->type->free_preparse(&prep); +error: return ret; } @@ -706,7 +720,7 @@ void key_type_put(struct key_type *ktype) * if we get an error. */ static inline key_ref_t __key_update(key_ref_t key_ref, - const void *payload, size_t plen) + struct key_preparsed_payload *prep) { struct key *key = key_ref_to_ptr(key_ref); int ret; @@ -722,7 +736,7 @@ static inline key_ref_t __key_update(key_ref_t key_ref, down_write(&key->sem); - ret = key->type->update(key, payload, plen); + ret = key->type->update(key, prep); if (ret == 0) /* updating a negative key instantiates it */ clear_bit(KEY_FLAG_NEGATIVE, &key->flags); @@ -774,6 +788,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, unsigned long flags) { unsigned long prealloc; + struct key_preparsed_payload prep; const struct cred *cred = current_cred(); struct key_type *ktype; struct key *keyring, *key = NULL; @@ -789,8 +804,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } key_ref = ERR_PTR(-EINVAL); - if (!ktype->match || !ktype->instantiate) - goto error_2; + if (!ktype->match || !ktype->instantiate || + (!description && !ktype->preparse)) + goto error_put_type; keyring = key_ref_to_ptr(keyring_ref); @@ -798,18 +814,37 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_ref = ERR_PTR(-ENOTDIR); if (keyring->type != &key_type_keyring) - goto error_2; + goto error_put_type; + + memset(&prep, 0, sizeof(prep)); + prep.data = payload; + prep.datalen = plen; + prep.quotalen = ktype->def_datalen; + if (ktype->preparse) { + ret = ktype->preparse(&prep); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_put_type; + } + if (!description) + description = prep.description; + key_ref = ERR_PTR(-EINVAL); + if (!description) + goto error_free_prep; + } ret = __key_link_begin(keyring, ktype, description, &prealloc); - if (ret < 0) - goto error_2; + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_free_prep; + } /* if we're going to allocate a new key, we're going to have * to modify the keyring */ ret = key_permission(keyring_ref, KEY_WRITE); if (ret < 0) { key_ref = ERR_PTR(ret); - goto error_3; + goto error_link_end; } /* if it's possible to update this type of key, search for an existing @@ -840,25 +875,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); - goto error_3; + goto error_link_end; } /* instantiate it and link it into the target keyring */ - ret = __key_instantiate_and_link(key, payload, plen, keyring, NULL, - &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc); if (ret < 0) { key_put(key); key_ref = ERR_PTR(ret); - goto error_3; + goto error_link_end; } key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); - error_3: +error_link_end: __key_link_end(keyring, ktype, prealloc); - error_2: +error_free_prep: + if (ktype->preparse) + ktype->free_preparse(&prep); +error_put_type: key_type_put(ktype); - error: +error: return key_ref; found_matching_key: @@ -866,10 +903,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, * - we can drop the locks first as we have the key pinned */ __key_link_end(keyring, ktype, prealloc); - key_type_put(ktype); - key_ref = __key_update(key_ref, payload, plen); - goto error; + key_ref = __key_update(key_ref, &prep); + goto error_free_prep; } EXPORT_SYMBOL(key_create_or_update); @@ -888,6 +924,7 @@ EXPORT_SYMBOL(key_create_or_update); */ int key_update(key_ref_t key_ref, const void *payload, size_t plen) { + struct key_preparsed_payload prep; struct key *key = key_ref_to_ptr(key_ref); int ret; @@ -900,18 +937,31 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) /* attempt to update it if supported */ ret = -EOPNOTSUPP; - if (key->type->update) { - down_write(&key->sem); - - ret = key->type->update(key, payload, plen); - if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + if (!key->type->update) + goto error; - up_write(&key->sem); + memset(&prep, 0, sizeof(prep)); + prep.data = payload; + prep.datalen = plen; + prep.quotalen = key->type->def_datalen; + if (key->type->preparse) { + ret = key->type->preparse(&prep); + if (ret < 0) + goto error; } - error: + down_write(&key->sem); + + ret = key->type->update(key, &prep); + if (ret == 0) + /* updating a negative key instantiates it */ + clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + + up_write(&key->sem); + + if (key->type->preparse) + key->type->free_preparse(&prep); +error: return ret; } EXPORT_SYMBOL(key_update); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3364fbf46807..505d40be196c 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -46,6 +46,9 @@ static int key_get_type_from_user(char *type, * Extract the description of a new key from userspace and either add it as a * new key to the specified keyring or update a matching key in that keyring. * + * If the description is NULL or an empty string, the key type is asked to + * generate one from the payload. + * * The keyring must be writable so that we can attach the key to it. * * If successful, the new key's serial number is returned, otherwise an error @@ -72,10 +75,17 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type, if (ret < 0) goto error; - description = strndup_user(_description, PAGE_SIZE); - if (IS_ERR(description)) { - ret = PTR_ERR(description); - goto error; + description = NULL; + if (_description) { + description = strndup_user(_description, PAGE_SIZE); + if (IS_ERR(description)) { + ret = PTR_ERR(description); + goto error; + } + if (!*description) { + kfree(description); + description = NULL; + } } /* pull the payload in if one was supplied */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 81e7852d281d..f04d8cf81f3c 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -66,7 +66,7 @@ static inline unsigned keyring_hash(const char *desc) * operations. */ static int keyring_instantiate(struct key *keyring, - const void *data, size_t datalen); + struct key_preparsed_payload *prep); static int keyring_match(const struct key *keyring, const void *criterion); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); @@ -121,12 +121,12 @@ static void keyring_publish_name(struct key *keyring) * Returns 0 on success, -EINVAL if given any data. */ static int keyring_instantiate(struct key *keyring, - const void *data, size_t datalen) + struct key_preparsed_payload *prep) { int ret; ret = -EINVAL; - if (datalen == 0) { + if (prep->datalen == 0) { /* make the keyring available by name if it has one */ keyring_publish_name(keyring); ret = 0; diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 60d4e3f5e4bb..85730d5a5a59 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -19,7 +19,8 @@ #include #include "internal.h" -static int request_key_auth_instantiate(struct key *, const void *, size_t); +static int request_key_auth_instantiate(struct key *, + struct key_preparsed_payload *); static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); @@ -42,10 +43,9 @@ struct key_type key_type_request_key_auth = { * Instantiate a request-key authorisation key. */ static int request_key_auth_instantiate(struct key *key, - const void *data, - size_t datalen) + struct key_preparsed_payload *prep) { - key->payload.data = (struct request_key_auth *) data; + key->payload.data = (struct request_key_auth *)prep->data; return 0; } diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 2d5d041f2049..42036c7a0856 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -927,22 +927,23 @@ static struct trusted_key_payload *trusted_payload_alloc(struct key *key) * * On success, return 0. Otherwise return errno. */ -static int trusted_instantiate(struct key *key, const void *data, - size_t datalen) +static int trusted_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct trusted_key_payload *payload = NULL; struct trusted_key_options *options = NULL; + size_t datalen = prep->datalen; char *datablob; int ret = 0; int key_cmd; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); datablob[datalen] = '\0'; options = trusted_options_alloc(); @@ -1011,17 +1012,18 @@ static void trusted_rcu_free(struct rcu_head *rcu) /* * trusted_update - reseal an existing key with new PCR values */ -static int trusted_update(struct key *key, const void *data, size_t datalen) +static int trusted_update(struct key *key, struct key_preparsed_payload *prep) { struct trusted_key_payload *p = key->payload.data; struct trusted_key_payload *new_p; struct trusted_key_options *new_o; + size_t datalen = prep->datalen; char *datablob; int ret = 0; if (!p->migratable) return -EPERM; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); @@ -1038,7 +1040,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen) goto out; } - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); datablob[datalen] = '\0'; ret = datablob_parse(datablob, new_p, new_o); if (ret != Opt_update) { diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index c7660a25a3e4..55dc88939185 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -58,13 +58,14 @@ EXPORT_SYMBOL_GPL(key_type_logon); /* * instantiate a user defined key */ -int user_instantiate(struct key *key, const void *data, size_t datalen) +int user_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload; + size_t datalen = prep->datalen; int ret; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto error; ret = key_payload_reserve(key, datalen); @@ -78,7 +79,7 @@ int user_instantiate(struct key *key, const void *data, size_t datalen) /* attach the data */ upayload->datalen = datalen; - memcpy(upayload->data, data, datalen); + memcpy(upayload->data, prep->data, datalen); rcu_assign_keypointer(key, upayload); ret = 0; @@ -92,13 +93,14 @@ EXPORT_SYMBOL_GPL(user_instantiate); * update a user defined key * - the key's semaphore is write-locked */ -int user_update(struct key *key, const void *data, size_t datalen) +int user_update(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload, *zap; + size_t datalen = prep->datalen; int ret; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto error; /* construct a replacement payload */ @@ -108,7 +110,7 @@ int user_update(struct key *key, const void *data, size_t datalen) goto error; upayload->datalen = datalen; - memcpy(upayload->data, data, datalen); + memcpy(upayload->data, prep->data, datalen); /* check the quota and attach the new data */ zap = upayload; -- cgit v1.2.3 From e6459606b04e6385ccd3c2060fc10f78a92c7700 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 1 Oct 2012 00:23:52 +0200 Subject: lib: Add early cpio decoder Add a simple cpio decoder without library dependencies for the purpose of extracting components from the initramfs blob for early kernel uses. Intended consumers so far are microcode and ACPI override. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1349043837-22659-2-git-send-email-trenn@suse.de Cc: Len Brown Cc: Fenghua Yu Signed-off-by: Thomas Renninger Signed-off-by: H. Peter Anvin --- include/linux/earlycpio.h | 17 ++++++ lib/Makefile | 2 +- lib/earlycpio.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 include/linux/earlycpio.h create mode 100644 lib/earlycpio.c (limited to 'include/linux') diff --git a/include/linux/earlycpio.h b/include/linux/earlycpio.h new file mode 100644 index 000000000000..111f46d83d00 --- /dev/null +++ b/include/linux/earlycpio.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_EARLYCPIO_H +#define _LINUX_EARLYCPIO_H + +#include + +#define MAX_CPIO_FILE_NAME 18 + +struct cpio_data { + void *data; + size_t size; + char name[MAX_CPIO_FILE_NAME]; +}; + +struct cpio_data find_cpio_data(const char *path, void *data, size_t len, + long *offset); + +#endif /* _LINUX_EARLYCPIO_H */ diff --git a/lib/Makefile b/lib/Makefile index 42d283edc4d3..0924041b6959 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o + is_single_threaded.o plist.o decompress.o earlycpio.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/earlycpio.c b/lib/earlycpio.c new file mode 100644 index 000000000000..8078ef49cb79 --- /dev/null +++ b/lib/earlycpio.c @@ -0,0 +1,145 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2012 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available + * under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ----------------------------------------------------------------------- */ + +/* + * earlycpio.c + * + * Find a specific cpio member; must precede any compressed content. + * This is used to locate data items in the initramfs used by the + * kernel itself during early boot (before the main initramfs is + * decompressed.) It is the responsibility of the initramfs creator + * to ensure that these items are uncompressed at the head of the + * blob. Depending on the boot loader or package tool that may be a + * separate file or part of the same file. + */ + +#include +#include +#include + +enum cpio_fields { + C_MAGIC, + C_INO, + C_MODE, + C_UID, + C_GID, + C_NLINK, + C_MTIME, + C_FILESIZE, + C_MAJ, + C_MIN, + C_RMAJ, + C_RMIN, + C_NAMESIZE, + C_CHKSUM, + C_NFIELDS +}; + +/** + * cpio_data find_cpio_data - Search for files in an uncompressed cpio + * @path: The directory to search for, including a slash at the end + * @data: Pointer to the the cpio archive or a header inside + * @len: Remaining length of the cpio based on data pointer + * @offset: When a matching file is found, this is the offset to the + * beginning of the cpio. It can be used to iterate through + * the cpio to find all files inside of a directory path + * + * @return: struct cpio_data containing the address, length and + * filename (with the directory path cut off) of the found file. + * If you search for a filename and not for files in a directory, + * pass the absolute path of the filename in the cpio and make sure + * the match returned an empty filename string. + */ + +struct cpio_data __cpuinit find_cpio_data(const char *path, void *data, + size_t len, long *offset) +{ + const size_t cpio_header_len = 8*C_NFIELDS - 2; + struct cpio_data cd = { NULL, 0, "" }; + const char *p, *dptr, *nptr; + unsigned int ch[C_NFIELDS], *chp, v; + unsigned char c, x; + size_t mypathsize = strlen(path); + int i, j; + + p = data; + + while (len > cpio_header_len) { + if (!*p) { + /* All cpio headers need to be 4-byte aligned */ + p += 4; + len -= 4; + continue; + } + + j = 6; /* The magic field is only 6 characters */ + chp = ch; + for (i = C_NFIELDS; i; i--) { + v = 0; + while (j--) { + v <<= 4; + c = *p++; + + x = c - '0'; + if (x < 10) { + v += x; + continue; + } + + x = (c | 0x20) - 'a'; + if (x < 6) { + v += x + 10; + continue; + } + + goto quit; /* Invalid hexadecimal */ + } + *chp++ = v; + j = 8; /* All other fields are 8 characters */ + } + + if ((ch[C_MAGIC] - 0x070701) > 1) + goto quit; /* Invalid magic */ + + len -= cpio_header_len; + + dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4); + nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4); + + if (nptr > p + len || dptr < p || nptr < dptr) + goto quit; /* Buffer overrun */ + + if ((ch[C_MODE] & 0170000) == 0100000 && + ch[C_NAMESIZE] >= mypathsize && + !memcmp(p, path, mypathsize)) { + *offset = (long)nptr - (long)data; + if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { + pr_warn( + "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", + p, MAX_CPIO_FILE_NAME); + } + strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME); + + cd.data = (void *)dptr; + cd.size = ch[C_FILESIZE]; + return cd; /* Found it! */ + } + len -= (nptr - p); + p = nptr; + } + +quit: + return cd; +} -- cgit v1.2.3 From 8e30524dcc0d0ac1a18a5cee482b9d9cde3cb332 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:53 +0200 Subject: x86, acpi: Introduce x86 arch specific arch_reserve_mem_area() for e820 handling This is needed for ACPI table overriding via initrd. Beside reserving memblocks, X86 also requires to flag the memory area to E820_RESERVED or E820_ACPI in the e820 mappings to be able to io(re)map it later. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-3-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/boot.c | 6 ++++++ include/linux/acpi.h | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6ed..6b75777c0a8d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1700,3 +1700,9 @@ int __acpi_release_global_lock(unsigned int *lock) } while (unlikely (val != old)); return old & 0x1; } + +void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + e820_add_region(addr, size, E820_ACPI); + update_e820(); +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f2a76224509..946fd1ea79ff 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -426,6 +426,14 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); +#if CONFIG_X86 +void arch_reserve_mem_area(acpi_physical_address addr, size_t size); +#else +static inline void arch_reserve_mem_area(acpi_physical_address addr, + size_t size) +{ +} +#endif /* CONFIG_X86 */ #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif -- cgit v1.2.3 From 53aac44c904abbad9f474f652f099de13b5c3563 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:54 +0200 Subject: ACPI: Store valid ACPI tables passed via early initrd in reserved memblock areas A later patch will compare them with ACPI tables that get loaded at boot or runtime and if criteria match, a stored one is loaded. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-4-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 2 + drivers/acpi/Kconfig | 9 ++++ drivers/acpi/osl.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 8 ++++ 4 files changed, 141 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b95..764e543b2297 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -941,6 +941,8 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); + acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); + reserve_crashkernel(); vsmp_init(); diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 80998958cf45..8cf719547b51 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -261,6 +261,15 @@ config ACPI_CUSTOM_DSDT bool default ACPI_CUSTOM_DSDT_FILE != "" +config ACPI_INITRD_TABLE_OVERRIDE + bool "ACPI tables can be passed via uncompressed cpio in initrd" + default n + help + This option provides functionality to override arbitrary ACPI tables + via initrd. No functional change if no ACPI tables are passed via + initrd, therefore it's safe to say Y. + See Documentation/acpi/initrd_table_override.txt for details + config ACPI_BLACKLIST_YEAR int "Disable ACPI for systems before Jan 1st this year" if X86_32 default 0 diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 9eaf708f5885..b20b07903218 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -534,6 +534,128 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, return AE_OK; } +#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE +#include +#include + +static u64 acpi_tables_addr; +static int all_tables_size; + +/* Copied from acpica/tbutils.c:acpi_tb_checksum() */ +u8 __init acpi_table_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + return sum; +} + +/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ +static const char * const table_sigs[] = { + ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, + ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, + ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF, + ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET, + ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI, + ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA, + ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT, + ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, + ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL }; + +/* Non-fatal errors: Affected tables/files are ignored */ +#define INVALID_TABLE(x, path, name) \ + { pr_err("ACPI OVERRIDE: " x " [%s%s]\n", path, name); continue; } + +#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) + +/* Must not increase 10 or needs code modification below */ +#define ACPI_OVERRIDE_TABLES 10 + +void __init acpi_initrd_override(void *data, size_t size) +{ + int sig, no, table_nr = 0, total_offset = 0; + long offset = 0; + struct acpi_table_header *table; + char cpio_path[32] = "kernel/firmware/acpi/"; + struct cpio_data file; + struct cpio_data early_initrd_files[ACPI_OVERRIDE_TABLES]; + char *p; + + if (data == NULL || size == 0) + return; + + for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) { + file = find_cpio_data(cpio_path, data, size, &offset); + if (!file.data) + break; + + data += offset; + size -= offset; + + if (file.size < sizeof(struct acpi_table_header)) + INVALID_TABLE("Table smaller than ACPI header", + cpio_path, file.name); + + table = file.data; + + for (sig = 0; table_sigs[sig]; sig++) + if (!memcmp(table->signature, table_sigs[sig], 4)) + break; + + if (!table_sigs[sig]) + INVALID_TABLE("Unknown signature", + cpio_path, file.name); + if (file.size != table->length) + INVALID_TABLE("File length does not match table length", + cpio_path, file.name); + if (acpi_table_checksum(file.data, table->length)) + INVALID_TABLE("Bad table checksum", + cpio_path, file.name); + + pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n", + table->signature, cpio_path, file.name, table->length); + + all_tables_size += table->length; + early_initrd_files[table_nr].data = file.data; + early_initrd_files[table_nr].size = file.size; + table_nr++; + } + if (table_nr == 0) + return; + + acpi_tables_addr = + memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT, + all_tables_size, PAGE_SIZE); + if (!acpi_tables_addr) { + WARN_ON(1); + return; + } + /* + * Only calling e820_add_reserve does not work and the + * tables are invalid (memory got used) later. + * memblock_reserve works as expected and the tables won't get modified. + * But it's not enough on X86 because ioremap will + * complain later (used by acpi_os_map_memory) that the pages + * that should get mapped are not marked "reserved". + * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area) + * works fine. + */ + memblock_reserve(acpi_tables_addr, acpi_tables_addr + all_tables_size); + arch_reserve_mem_area(acpi_tables_addr, all_tables_size); + + p = early_ioremap(acpi_tables_addr, all_tables_size); + + for (no = 0; no < table_nr; no++) { + memcpy(p + total_offset, early_initrd_files[no].data, + early_initrd_files[no].size); + total_offset += early_initrd_files[no].size; + } + early_iounmap(p, all_tables_size); +} +#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ + acpi_status acpi_os_table_override(struct acpi_table_header * existing_table, struct acpi_table_header ** new_table) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 946fd1ea79ff..d14081c10c17 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -76,6 +76,14 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); +#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE +void acpi_initrd_override(void *data, size_t size); +#else +static inline void acpi_initrd_override(void *data, size_t size) +{ +} +#endif + char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); -- cgit v1.2.3 From 3a50597de8635cd05133bd12c95681c82fe7b878 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:29 +0100 Subject: KEYS: Make the session and process keyrings per-thread Make the session keyring per-thread rather than per-process, but still inherited from the parent thread to solve a problem with PAM and gdm. The problem is that join_session_keyring() will reject attempts to change the session keyring of a multithreaded program but gdm is now multithreaded before it gets to the point of starting PAM and running pam_keyinit to create the session keyring. See: https://bugs.freedesktop.org/show_bug.cgi?id=49211 The reason that join_session_keyring() will only change the session keyring under a single-threaded environment is that it's hard to alter the other thread's credentials to effect the change in a multi-threaded program. The problems are such as: (1) How to prevent two threads both running join_session_keyring() from racing. (2) Another thread's credentials may not be modified directly by this process. (3) The number of threads is uncertain whilst we're not holding the appropriate spinlock, making preallocation slightly tricky. (4) We could use TIF_NOTIFY_RESUME and key_replace_session_keyring() to get another thread to replace its keyring, but that means preallocating for each thread. A reasonable way around this is to make the session keyring per-thread rather than per-process and just document that if you want a common session keyring, you must get it before you spawn any threads - which is the current situation anyway. Whilst we're at it, we can the process keyring behave in the same way. This means we can clean up some of the ickyness in the creds code. Basically, after this patch, the session, process and thread keyrings are about inheritance rules only and not about sharing changes of keyring. Reported-by: Mantas M. Signed-off-by: David Howells Tested-by: Ray Strode --- include/linux/cred.h | 17 +----- kernel/cred.c | 127 +++++-------------------------------------- security/keys/keyctl.c | 11 ++-- security/keys/process_keys.c | 66 ++++++++-------------- security/keys/request_key.c | 10 ++-- 5 files changed, 50 insertions(+), 181 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index ebbed2ce6637..0142aacb70b7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -76,21 +76,6 @@ extern int groups_search(const struct group_info *, kgid_t); extern int in_group_p(kgid_t); extern int in_egroup_p(kgid_t); -/* - * The common credentials for a thread group - * - shared by CLONE_THREAD - */ -#ifdef CONFIG_KEYS -struct thread_group_cred { - atomic_t usage; - pid_t tgid; /* thread group process ID */ - spinlock_t lock; - struct key __rcu *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ - struct rcu_head rcu; /* RCU deletion hook */ -}; -#endif - /* * The security context of a task * @@ -139,6 +124,8 @@ struct cred { #ifdef CONFIG_KEYS unsigned char jit_keyring; /* default keyring to attach requested * keys to */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ struct thread_group_cred *tgcred; /* thread-group shared credentials */ diff --git a/kernel/cred.c b/kernel/cred.c index de728ac50d82..3f7ad1ec2ae4 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -29,17 +29,6 @@ static struct kmem_cache *cred_jar; -/* - * The common credentials for the initial task's thread group - */ -#ifdef CONFIG_KEYS -static struct thread_group_cred init_tgcred = { - .usage = ATOMIC_INIT(2), - .tgid = 0, - .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock), -}; -#endif - /* * The initial credentials for the initial task */ @@ -65,9 +54,6 @@ struct cred init_cred = { .user = INIT_USER, .user_ns = &init_user_ns, .group_info = &init_groups, -#ifdef CONFIG_KEYS - .tgcred = &init_tgcred, -#endif }; static inline void set_cred_subscribers(struct cred *cred, int n) @@ -95,36 +81,6 @@ static inline void alter_cred_subscribers(const struct cred *_cred, int n) #endif } -/* - * Dispose of the shared task group credentials - */ -#ifdef CONFIG_KEYS -static void release_tgcred_rcu(struct rcu_head *rcu) -{ - struct thread_group_cred *tgcred = - container_of(rcu, struct thread_group_cred, rcu); - - BUG_ON(atomic_read(&tgcred->usage) != 0); - - key_put(tgcred->session_keyring); - key_put(tgcred->process_keyring); - kfree(tgcred); -} -#endif - -/* - * Release a set of thread group credentials. - */ -static void release_tgcred(struct cred *cred) -{ -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred = cred->tgcred; - - if (atomic_dec_and_test(&tgcred->usage)) - call_rcu(&tgcred->rcu, release_tgcred_rcu); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -150,9 +106,10 @@ static void put_cred_rcu(struct rcu_head *rcu) #endif security_cred_free(cred); + key_put(cred->session_keyring); + key_put(cred->process_keyring); key_put(cred->thread_keyring); key_put(cred->request_key_auth); - release_tgcred(cred); if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); @@ -246,15 +203,6 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; -#ifdef CONFIG_KEYS - new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); - if (!new->tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } - atomic_set(&new->tgcred->usage, 1); -#endif - atomic_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; @@ -308,9 +256,10 @@ struct cred *prepare_creds(void) get_user_ns(new->user_ns); #ifdef CONFIG_KEYS + key_get(new->session_keyring); + key_get(new->process_keyring); key_get(new->thread_keyring); key_get(new->request_key_auth); - atomic_inc(&new->tgcred->usage); #endif #ifdef CONFIG_SECURITY @@ -334,39 +283,20 @@ EXPORT_SYMBOL(prepare_creds); */ struct cred *prepare_exec_creds(void) { - struct thread_group_cred *tgcred = NULL; struct cred *new; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) - return NULL; -#endif - new = prepare_creds(); - if (!new) { - kfree(tgcred); + if (!new) return new; - } #ifdef CONFIG_KEYS /* newly exec'd tasks don't get a thread keyring */ key_put(new->thread_keyring); new->thread_keyring = NULL; - /* create a new per-thread-group creds for all this set of threads to - * share */ - memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - /* inherit the session keyring; new process keyring */ - key_get(tgcred->session_keyring); - tgcred->process_keyring = NULL; - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; #endif return new; @@ -383,9 +313,6 @@ struct cred *prepare_exec_creds(void) */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif struct cred *new; int ret; @@ -425,22 +352,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) install_thread_keyring_to_cred(new); } - /* we share the process and session keyrings between all the threads in - * a process - this is slightly icky as we violate COW credentials a - * bit */ + /* The process keyring is only shared between the threads in a process; + * anything outside of those threads doesn't inherit. + */ if (!(clone_flags & CLONE_THREAD)) { - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - ret = -ENOMEM; - goto error_put; - } - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = key_get(new->tgcred->session_keyring); - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; } #endif @@ -643,9 +560,6 @@ void __init cred_init(void) */ struct cred *prepare_kernel_cred(struct task_struct *daemon) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif const struct cred *old; struct cred *new; @@ -653,14 +567,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } -#endif - kdebug("prepare_kernel_cred() alloc %p", new); if (daemon) @@ -678,13 +584,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) get_group_info(new->group_info); #ifdef CONFIG_KEYS - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = NULL; - new->tgcred = tgcred; - new->request_key_auth = NULL; + new->session_keyring = NULL; + new->process_keyring = NULL; new->thread_keyring = NULL; + new->request_key_auth = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; #endif diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index a0d373f76815..65b38417c211 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1475,7 +1475,8 @@ long keyctl_session_to_parent(void) goto error_keyring; newwork = &cred->rcu; - cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); + cred->session_keyring = key_ref_to_ptr(keyring_r); + keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); me = current; @@ -1500,7 +1501,7 @@ long keyctl_session_to_parent(void) mycred = current_cred(); pcred = __task_cred(parent); if (mycred == pcred || - mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) { + mycred->session_keyring == pcred->session_keyring) { ret = 0; goto unlock; } @@ -1516,9 +1517,9 @@ long keyctl_session_to_parent(void) goto unlock; /* the keyrings must have the same UID */ - if ((pcred->tgcred->session_keyring && - pcred->tgcred->session_keyring->uid != mycred->euid) || - mycred->tgcred->session_keyring->uid != mycred->euid) + if ((pcred->session_keyring && + pcred->session_keyring->uid != mycred->euid) || + mycred->session_keyring->uid != mycred->euid) goto unlock; /* cancel an already pending keyring replacement */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 178b8c3b130a..9de5dc598276 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -169,9 +169,8 @@ static int install_thread_keyring(void) int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; - int ret; - if (new->tgcred->process_keyring) + if (new->process_keyring) return -EEXIST; keyring = keyring_alloc("_pid", new->uid, new->gid, @@ -179,17 +178,8 @@ int install_process_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); - spin_lock_irq(&new->tgcred->lock); - if (!new->tgcred->process_keyring) { - new->tgcred->process_keyring = keyring; - keyring = NULL; - ret = 0; - } else { - ret = -EEXIST; - } - spin_unlock_irq(&new->tgcred->lock); - key_put(keyring); - return ret; + new->process_keyring = keyring; + return 0; } /* @@ -230,7 +220,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) /* create an empty session keyring */ if (!keyring) { flags = KEY_ALLOC_QUOTA_OVERRUN; - if (cred->tgcred->session_keyring) + if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; keyring = keyring_alloc("_ses", cred->uid, cred->gid, @@ -242,17 +232,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) } /* install the keyring */ - spin_lock_irq(&cred->tgcred->lock); - old = cred->tgcred->session_keyring; - rcu_assign_pointer(cred->tgcred->session_keyring, keyring); - spin_unlock_irq(&cred->tgcred->lock); - - /* we're using RCU on the pointer, but there's no point synchronising - * on it if it didn't previously point to anything */ - if (old) { - synchronize_rcu(); + old = cred->session_keyring; + rcu_assign_pointer(cred->session_keyring, keyring); + + if (old) key_put(old); - } return 0; } @@ -367,9 +351,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (cred->tgcred->process_keyring) { + if (cred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->tgcred->process_keyring, 1), + make_key_ref(cred->process_keyring, 1), cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -388,12 +372,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (cred->tgcred->session_keyring) { + if (cred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( - make_key_ref(rcu_dereference( - cred->tgcred->session_keyring), - 1), + make_key_ref(rcu_dereference(cred->session_keyring), 1), cred, type, description, match, no_state_check); rcu_read_unlock(); @@ -563,7 +545,7 @@ try_again: break; case KEY_SPEC_PROCESS_KEYRING: - if (!cred->tgcred->process_keyring) { + if (!cred->process_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -575,13 +557,13 @@ try_again: goto reget_creds; } - key = cred->tgcred->process_keyring; + key = cred->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!cred->tgcred->session_keyring) { + if (!cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -596,7 +578,7 @@ try_again: if (ret < 0) goto error; goto reget_creds; - } else if (cred->tgcred->session_keyring == + } else if (cred->session_keyring == cred->user->session_keyring && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); @@ -606,7 +588,7 @@ try_again: } rcu_read_lock(); - key = rcu_dereference(cred->tgcred->session_keyring); + key = rcu_dereference(cred->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); @@ -766,12 +748,6 @@ long join_session_keyring(const char *name) struct key *keyring; long ret, serial; - /* only permit this if there's a single thread in the thread group - - * this avoids us having to adjust the creds on all threads and risking - * ENOMEM */ - if (!current_is_single_threaded()) - return -EMLINK; - new = prepare_creds(); if (!new) return -ENOMEM; @@ -783,7 +759,7 @@ long join_session_keyring(const char *name) if (ret < 0) goto error; - serial = new->tgcred->session_keyring->serial; + serial = new->session_keyring->serial; ret = commit_creds(new); if (ret == 0) ret = serial; @@ -806,6 +782,9 @@ long join_session_keyring(const char *name) } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; + } else if (keyring == new->session_keyring) { + ret = 0; + goto error2; } /* we've got a keyring - now to install it */ @@ -862,8 +841,7 @@ void key_change_session_keyring(struct callback_head *twork) new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); - new->tgcred->tgid = old->tgcred->tgid; - new->tgcred->process_keyring = key_get(old->tgcred->process_keyring); + new->process_keyring = key_get(old->process_keyring); security_transfer_creds(new, old); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 000e75017520..275c4f9e4b8c 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -150,12 +150,12 @@ static int call_sbin_request_key(struct key_construction *cons, cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; - if (cred->tgcred->process_keyring) - prkey = cred->tgcred->process_keyring->serial; + if (cred->process_keyring) + prkey = cred->process_keyring->serial; sprintf(keyring_str[1], "%d", prkey); rcu_read_lock(); - session = rcu_dereference(cred->tgcred->session_keyring); + session = rcu_dereference(cred->session_keyring); if (!session) session = cred->user->session_keyring; sskey = session->serial; @@ -297,14 +297,14 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = key_get(cred->tgcred->process_keyring); + dest_keyring = key_get(cred->process_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_SESSION_KEYRING: rcu_read_lock(); dest_keyring = key_get( - rcu_dereference(cred->tgcred->session_keyring)); + rcu_dereference(cred->session_keyring)); rcu_read_unlock(); if (dest_keyring) -- cgit v1.2.3 From 96b5c8fea6c0861621051290d705ec2e971963f1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:56 +0100 Subject: KEYS: Reduce initial permissions on keys Reduce the initial permissions on new keys to grant the possessor everything, view permission only to the user (so the keys can be seen in /proc/keys) and nothing else. This gives the creator a chance to adjust the permissions mask before other processes can access the new key or create a link to it. To aid with this, keyring_alloc() now takes a permission argument rather than setting the permissions itself. The following permissions are now set: (1) The user and user-session keyrings grant the user that owns them full permissions and grant a possessor everything bar SETATTR. (2) The process and thread keyrings grant the possessor full permissions but only grant the user VIEW. This permits the user to see them in /proc/keys, but not to do anything with them. (3) Anonymous session keyrings grant the possessor full permissions, but only grant the user VIEW and READ. This means that the user can see them in /proc/keys and can list them, but nothing else. Possibly READ shouldn't be provided either. (4) Named session keyrings grant everything an anonymous session keyring does, plus they grant the user LINK permission. The whole point of named session keyrings is that others can also subscribe to them. Possibly this should be a separate permission to LINK. (5) The temporary session keyring created by call_sbin_request_key() gets the same permissions as an anonymous session keyring. (6) Keys created by add_key() get VIEW, SEARCH, LINK and SETATTR for the possessor, plus READ and/or WRITE if the key type supports them. The used only gets VIEW now. (7) Keys created by request_key() now get the same as those created by add_key(). Reported-by: Lennart Poettering Reported-by: Stef Walter Signed-off-by: David Howells --- include/linux/key.h | 1 + security/keys/key.c | 6 +++--- security/keys/keyring.c | 9 +++------ security/keys/process_keys.c | 26 +++++++++++++++++--------- security/keys/request_key.c | 11 ++++++++++- 5 files changed, 34 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index cef3b315ba7c..890699815212 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -264,6 +264,7 @@ extern int key_unlink(struct key *keyring, extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, const struct cred *cred, + key_perm_t perm, unsigned long flags, struct key *dest); diff --git a/security/keys/key.c b/security/keys/key.c index 50d96d4e06f2..bebeca3a78e4 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -826,13 +826,13 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, /* if the client doesn't provide, decide on the permissions we want */ if (perm == KEY_PERM_UNDEF) { perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; - perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; + perm |= KEY_USR_VIEW; if (ktype->read) - perm |= KEY_POS_READ | KEY_USR_READ; + perm |= KEY_POS_READ; if (ktype == &key_type_keyring || ktype->update) - perm |= KEY_USR_WRITE; + perm |= KEY_POS_WRITE; } /* allocate a new key */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 81e7852d281d..cf704a92083f 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -257,17 +257,14 @@ error: * Allocate a keyring and link into the destination keyring. */ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - const struct cred *cred, unsigned long flags, - struct key *dest) + const struct cred *cred, key_perm_t perm, + unsigned long flags, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - flags); - + uid, gid, cred, perm, flags); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); if (ret < 0) { diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 9de5dc598276..b58d93892740 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -46,9 +46,11 @@ int install_user_keyrings(void) struct user_struct *user; const struct cred *cred; struct key *uid_keyring, *session_keyring; + key_perm_t user_keyring_perm; char buf[20]; int ret; + user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL; cred = current_cred(); user = cred->user; @@ -72,8 +74,8 @@ int install_user_keyrings(void) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - cred, KEY_ALLOC_IN_QUOTA, - NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); goto error; @@ -88,7 +90,8 @@ int install_user_keyrings(void) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - cred, KEY_ALLOC_IN_QUOTA, NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -129,6 +132,7 @@ int install_thread_keyring_to_cred(struct cred *new) struct key *keyring; keyring = keyring_alloc("_tid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -173,8 +177,9 @@ int install_process_keyring_to_cred(struct cred *new) if (new->process_keyring) return -EEXIST; - keyring = keyring_alloc("_pid", new->uid, new->gid, - new, KEY_ALLOC_QUOTA_OVERRUN, NULL); + keyring = keyring_alloc("_pid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, + KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -223,8 +228,9 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc("_ses", cred->uid, cred->gid, - cred, flags, NULL); + keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, + flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { @@ -773,8 +779,10 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, old->uid, old->gid, old, - KEY_ALLOC_IN_QUOTA, NULL); + keyring = keyring_alloc( + name, old->uid, old->gid, old, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 275c4f9e4b8c..0ae3a2202771 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -126,6 +126,7 @@ static int call_sbin_request_key(struct key_construction *cons, cred = get_current_cred(); keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_QUOTA_OVERRUN, NULL); put_cred(cred); if (IS_ERR(keyring)) { @@ -347,6 +348,7 @@ static int construct_alloc_key(struct key_type *type, const struct cred *cred = current_cred(); unsigned long prealloc; struct key *key; + key_perm_t perm; key_ref_t key_ref; int ret; @@ -355,8 +357,15 @@ static int construct_alloc_key(struct key_type *type, *_key = NULL; mutex_lock(&user->cons_lock); + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW; + if (type->read) + perm |= KEY_POS_READ; + if (type == &key_type_keyring || type->update) + perm |= KEY_POS_WRITE; + key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, - KEY_POS_ALL, flags); + perm, flags); if (IS_ERR(key)) goto alloc_failed; -- cgit v1.2.3 From 385ddeac7ed99cf7dc62d76274d55fbd7cae1b5a Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Fri, 5 Oct 2012 15:05:34 -0700 Subject: X86 ACPI: Use #ifdef not #if for CONFIG_X86 check Fix a build warning on ia64: include/linux/acpi.h:437:5: warning: "CONFIG_X86" is not defined Signed-off-by: Tony Luck Link: http://lkml.kernel.org/r/506f59ae9600b36a4@agluck-desktop.sc.intel.com Cc: Len Brown Cc: Thomas Renninger Signed-off-by: H. Peter Anvin --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d14081c10c17..49fe586e3b1e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -434,7 +434,7 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); -#if CONFIG_X86 +#ifdef CONFIG_X86 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else static inline void arch_reserve_mem_area(acpi_physical_address addr, -- cgit v1.2.3 From 0384e90b853357d24935c65ba0e1bdd27faa6e58 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 7 Oct 2012 18:19:44 +0200 Subject: spi/mcspi: allow configuration of pin directions Allow D0 to be an input and D1 to be an output, configurable via platform data and a new DT property. Based on a patch from Matus Ujhelyi Signed-off-by: Daniel Mack Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/omap-spi.txt | 4 +++- drivers/spi/spi-omap2-mcspi.c | 25 ++++++++++++++++------ include/linux/platform_data/spi-omap2-mcspi.h | 4 ++++ 3 files changed, 25 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt index 81df374adbb9..2ef0a6b85653 100644 --- a/Documentation/devicetree/bindings/spi/omap-spi.txt +++ b/Documentation/devicetree/bindings/spi/omap-spi.txt @@ -6,7 +6,9 @@ Required properties: - "ti,omap4-spi" for OMAP4+. - ti,spi-num-cs : Number of chipselect supported by the instance. - ti,hwmods: Name of the hwmod associated to the McSPI - +- ti,pindir-d0-in-d1-out: Select the D0 pin as input and D1 as + output. The default is D0 as output and + D1 as input. Example: diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 3542fdc664b1..51046332677c 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -130,6 +130,7 @@ struct omap2_mcspi { struct omap2_mcspi_dma *dma_channels; struct device *dev; struct omap2_mcspi_regs ctx; + unsigned int pin_dir:1; }; struct omap2_mcspi_cs { @@ -765,8 +766,15 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi, /* standard 4-wire master mode: SCK, MOSI/out, MISO/in, nCS * REVISIT: this controller could support SPI_3WIRE mode. */ - l &= ~(OMAP2_MCSPI_CHCONF_IS|OMAP2_MCSPI_CHCONF_DPE1); - l |= OMAP2_MCSPI_CHCONF_DPE0; + if (mcspi->pin_dir == MCSPI_PINDIR_D0_OUT_D1_IN) { + l &= ~OMAP2_MCSPI_CHCONF_IS; + l &= ~OMAP2_MCSPI_CHCONF_DPE1; + l |= OMAP2_MCSPI_CHCONF_DPE0; + } else { + l |= OMAP2_MCSPI_CHCONF_IS; + l |= OMAP2_MCSPI_CHCONF_DPE1; + l &= ~OMAP2_MCSPI_CHCONF_DPE0; + } /* wordlength */ l &= ~OMAP2_MCSPI_CHCONF_WL_MASK; @@ -1167,6 +1175,11 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev) master->cleanup = omap2_mcspi_cleanup; master->dev.of_node = node; + dev_set_drvdata(&pdev->dev, master); + + mcspi = spi_master_get_devdata(master); + mcspi->master = master; + match = of_match_device(omap_mcspi_of_match, &pdev->dev); if (match) { u32 num_cs = 1; /* default number of chipselect */ @@ -1175,19 +1188,17 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev) of_property_read_u32(node, "ti,spi-num-cs", &num_cs); master->num_chipselect = num_cs; master->bus_num = bus_num++; + if (of_get_property(node, "ti,pindir-d0-in-d1-out", NULL)) + mcspi->pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT; } else { pdata = pdev->dev.platform_data; master->num_chipselect = pdata->num_cs; if (pdev->id != -1) master->bus_num = pdev->id; + mcspi->pin_dir = pdata->pin_dir; } regs_offset = pdata->regs_offset; - dev_set_drvdata(&pdev->dev, master); - - mcspi = spi_master_get_devdata(master); - mcspi->master = master; - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (r == NULL) { status = -ENODEV; diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h index a357eb26bd25..ce70f7b5a8e1 100644 --- a/include/linux/platform_data/spi-omap2-mcspi.h +++ b/include/linux/platform_data/spi-omap2-mcspi.h @@ -7,9 +7,13 @@ #define OMAP4_MCSPI_REG_OFFSET 0x100 +#define MCSPI_PINDIR_D0_OUT_D1_IN 0 +#define MCSPI_PINDIR_D0_IN_D1_OUT 1 + struct omap2_mcspi_platform_config { unsigned short num_cs; unsigned int regs_offset; + unsigned int pin_dir:1; }; struct omap2_mcspi_dev_attr { -- cgit v1.2.3 From b2409b65d9401f7e3c572fc59db6a920fc7a163e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 19 Oct 2012 07:29:17 -0300 Subject: [media] remove include/linux/dvb/dmx.h The only reason for this header is to make sure that include linux/time.h were added before uapi/*/dmx.h. Just push down the time.h header on the few places where this is used, and drop this new header. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dmxdev.h | 1 + drivers/media/firewire/firedtv.h | 1 + drivers/media/pci/ttpci/av7110.h | 1 + drivers/media/usb/tlg2300/pd-dvb.c | 1 + include/linux/dvb/dmx.h | 29 ----------------------------- 5 files changed, 4 insertions(+), 29 deletions(-) delete mode 100644 include/linux/dvb/dmx.h (limited to 'include/linux') diff --git a/drivers/media/dvb-core/dmxdev.h b/drivers/media/dvb-core/dmxdev.h index 02ebe28f830d..48c6cf92ab99 100644 --- a/drivers/media/dvb-core/dmxdev.h +++ b/drivers/media/dvb-core/dmxdev.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h index 4fdcd8cb7530..c2ba085e0d20 100644 --- a/drivers/media/firewire/firedtv.h +++ b/drivers/media/firewire/firedtv.h @@ -13,6 +13,7 @@ #ifndef _FIREDTV_H #define _FIREDTV_H +#include #include #include #include diff --git a/drivers/media/pci/ttpci/av7110.h b/drivers/media/pci/ttpci/av7110.h index 88b3b2d6cc0e..a378662b1dcf 100644 --- a/drivers/media/pci/ttpci/av7110.h +++ b/drivers/media/pci/ttpci/av7110.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/usb/tlg2300/pd-dvb.c b/drivers/media/usb/tlg2300/pd-dvb.c index 30fcb117e898..ca4994a5190c 100644 --- a/drivers/media/usb/tlg2300/pd-dvb.c +++ b/drivers/media/usb/tlg2300/pd-dvb.c @@ -1,6 +1,7 @@ #include "pd-common.h" #include #include +#include #include #include #include diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h deleted file mode 100644 index 0be6d8f2b52b..000000000000 --- a/include/linux/dvb/dmx.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * dmx.h - * - * Copyright (C) 2000 Marcus Metzler - * & Ralph Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ -#ifndef _DVBDMX_H_ -#define _DVBDMX_H_ - -#include -#include - -#endif /*_DVBDMX_H_*/ -- cgit v1.2.3 From 74df06daf632ce2d321d01cb046004768352efc4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 19 Oct 2012 07:41:50 -0300 Subject: [media] Remove include/linux/dvb/ stuff The only file left there is include/linux/dvb/video.h. The only function for it is to include linux/compiler.h, but this is already indirectly included. So, get rid of it. Signed-off-by: Mauro Carvalho Chehab --- include/linux/dvb/Kbuild | 0 include/linux/dvb/video.h | 29 ----------------------------- 2 files changed, 29 deletions(-) delete mode 100644 include/linux/dvb/Kbuild delete mode 100644 include/linux/dvb/video.h (limited to 'include/linux') diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h deleted file mode 100644 index 85c20d925696..000000000000 --- a/include/linux/dvb/video.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * video.h - * - * Copyright (C) 2000 Marcus Metzler - * & Ralph Metzler - * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ -#ifndef _DVBVIDEO_H_ -#define _DVBVIDEO_H_ - -#include -#include - -#endif /*_DVBVIDEO_H_*/ -- cgit v1.2.3 From 7e05484f02e1ea05a3aae0724d4df1e8a5a1920f Mon Sep 17 00:00:00 2001 From: Bryan Venteicher Date: Tue, 16 Oct 2012 23:55:54 +1030 Subject: virtio-scsi: Add real 2-clause BSD license to header This is analogous to commit a1b383870a made by Rusty Russell to all the VirtIO headers at the time. This eases the use of the header as is by other OSes. Signed-off-by: Bryan Venteicher Acked-by: Paolo Bonzini Signed-off-by: Rusty Russell --- include/linux/virtio_scsi.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h index d6b4440387b7..4195b97a3def 100644 --- a/include/linux/virtio_scsi.h +++ b/include/linux/virtio_scsi.h @@ -1,7 +1,31 @@ +/* + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #ifndef _LINUX_VIRTIO_SCSI_H #define _LINUX_VIRTIO_SCSI_H -/* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ #define VIRTIO_SCSI_CDB_SIZE 32 #define VIRTIO_SCSI_SENSE_SIZE 96 -- cgit v1.2.3 From 1b4f59e356cc94929305bd107b7f38eec62715ad Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 22 Oct 2012 18:05:36 +0400 Subject: slub: Commonize slab_cache field in struct page Right now, slab and slub have fields in struct page to derive which cache a page belongs to, but they do it slightly differently. slab uses a field called slab_cache, that lives in the third double word. slub, uses a field called "slab", living outside of the doublewords area. Ideally, we could use the same field for this. Since slub heavily makes use of the doubleword region, there isn't really much room to move slub's slab_cache field around. Since slab does not have such strict placement restrictions, we can move it outside the doubleword area. The naming used by slab, "slab_cache", is less confusing, and it is preferred over slub's generic "slab". Signed-off-by: Glauber Costa Acked-by: Christoph Lameter CC: David Rientjes Signed-off-by: Pekka Enberg --- include/linux/mm_types.h | 7 ++----- mm/slub.c | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 31f8a3af7d94..2fef4e720e79 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -128,10 +128,7 @@ struct page { }; struct list_head list; /* slobs list of pages */ - struct { /* slab fields */ - struct kmem_cache *slab_cache; - struct slab *slab_page; - }; + struct slab *slab_page; /* slab fields */ }; /* Remainder is not double word aligned */ @@ -146,7 +143,7 @@ struct page { #if USE_SPLIT_PTLOCKS spinlock_t ptl; #endif - struct kmem_cache *slab; /* SLUB: Pointer to slab */ + struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ struct page *first_page; /* Compound tail pages */ }; diff --git a/mm/slub.c b/mm/slub.c index 16274b273c61..35483e0ab6bc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1092,11 +1092,11 @@ static noinline struct kmem_cache_node *free_debug_processing( if (!check_object(s, page, object, SLUB_RED_ACTIVE)) goto out; - if (unlikely(s != page->slab)) { + if (unlikely(s != page->slab_cache)) { if (!PageSlab(page)) { slab_err(s, page, "Attempt to free object(0x%p) " "outside of slab", object); - } else if (!page->slab) { + } else if (!page->slab_cache) { printk(KERN_ERR "SLUB : no slab for object 0x%p.\n", object); @@ -1357,7 +1357,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) goto out; inc_slabs_node(s, page_to_nid(page), page->objects); - page->slab = s; + page->slab_cache = s; __SetPageSlab(page); if (page->pfmemalloc) SetPageSlabPfmemalloc(page); @@ -1424,7 +1424,7 @@ static void rcu_free_slab(struct rcu_head *h) else page = container_of((struct list_head *)h, struct page, lru); - __free_slab(page->slab, page); + __free_slab(page->slab_cache, page); } static void free_slab(struct kmem_cache *s, struct page *page) @@ -2617,9 +2617,9 @@ void kmem_cache_free(struct kmem_cache *s, void *x) page = virt_to_head_page(x); - if (kmem_cache_debug(s) && page->slab != s) { + if (kmem_cache_debug(s) && page->slab_cache != s) { pr_err("kmem_cache_free: Wrong slab cache. %s but object" - " is from %s\n", page->slab->name, s->name); + " is from %s\n", page->slab_cache->name, s->name); WARN_ON_ONCE(1); return; } @@ -3418,7 +3418,7 @@ size_t ksize(const void *object) return PAGE_SIZE << compound_order(page); } - return slab_ksize(page->slab); + return slab_ksize(page->slab_cache); } EXPORT_SYMBOL(ksize); @@ -3443,8 +3443,8 @@ bool verify_mem_not_deleted(const void *x) } slab_lock(page); - if (on_freelist(page->slab, page, object)) { - object_err(page->slab, page, object, "Object is on free-list"); + if (on_freelist(page->slab_cache, page, object)) { + object_err(page->slab_cache, page, object, "Object is on free-list"); rv = false; } else { rv = true; @@ -3475,7 +3475,7 @@ void kfree(const void *x) __free_pages(page, compound_order(page)); return; } - slab_free(page->slab, page, object, _RET_IP_); + slab_free(page->slab_cache, page, object, _RET_IP_); } EXPORT_SYMBOL(kfree); @@ -3686,11 +3686,11 @@ static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) if (n) { list_for_each_entry(p, &n->partial, lru) - p->slab = s; + p->slab_cache = s; #ifdef CONFIG_SLUB_DEBUG list_for_each_entry(p, &n->full, lru) - p->slab = s; + p->slab_cache = s; #endif } } -- cgit v1.2.3 From 6c9c6d6301287e369a754d628230fa6e50cdb74b Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 8 Oct 2012 11:08:06 -0600 Subject: dma-debug: New interfaces to debug dma mapping errors Add dma-debug interface debug_dma_mapping_error() to debug drivers that fail to check dma mapping errors on addresses returned by dma_map_single() and dma_map_page() interfaces. This interface clears a flag set by debug_dma_map_page() to indicate that dma_mapping_error() has been called by the driver. When driver does unmap, debug_dma_unmap() checks the flag and if this flag is still set, prints warning message that includes call trace that leads up to the unmap. This interface can be called from dma_mapping_error() routines to enable dma mapping error check debugging. Tested: Intel iommu and swiotlb (iommu=soft) on x86-64 with CONFIG_DMA_API_DEBUG enabled and disabled. Signed-off-by: Shuah Khan Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Joerg Roedel --- Documentation/DMA-API.txt | 12 +++++++ arch/x86/include/asm/dma-mapping.h | 1 + include/linux/dma-debug.h | 7 ++++ lib/dma-debug.c | 71 +++++++++++++++++++++++++++++++++++--- 4 files changed, 87 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 66bd97a95f10..78a6c569d204 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -678,3 +678,15 @@ out of dma_debug_entries. These entries are preallocated at boot. The number of preallocated entries is defined per architecture. If it is too low for you boot with 'dma_debug_entries=' to overwrite the architectural default. + +void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr); + +dma-debug interface debug_dma_mapping_error() to debug drivers that fail +to check dma mapping errors on addresses returned by dma_map_single() and +dma_map_page() interfaces. This interface clears a flag set by +debug_dma_map_page() to indicate that dma_mapping_error() has been called by +the driver. When driver does unmap, debug_dma_unmap() checks the flag and if +this flag is still set, prints warning message that includes call trace that +leads up to the unmap. This interface can be called from dma_mapping_error() +routines to enable dma mapping error check debugging. + diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f7b4c7903e7e..808dae63eeea 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -47,6 +47,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 171ad8aedc83..fc0e34ce038f 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -39,6 +39,8 @@ extern void debug_dma_map_page(struct device *dev, struct page *page, int direction, dma_addr_t dma_addr, bool map_single); +extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); + extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single); @@ -105,6 +107,11 @@ static inline void debug_dma_map_page(struct device *dev, struct page *page, { } +static inline void debug_dma_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ +} + static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d84beb994f36..59f4a1a8187d 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -45,6 +45,12 @@ enum { dma_debug_coherent, }; +enum map_err_types { + MAP_ERR_CHECK_NOT_APPLICABLE, + MAP_ERR_NOT_CHECKED, + MAP_ERR_CHECKED, +}; + #define DMA_DEBUG_STACKTRACE_ENTRIES 5 struct dma_debug_entry { @@ -57,6 +63,7 @@ struct dma_debug_entry { int direction; int sg_call_ents; int sg_mapped_ents; + enum map_err_types map_err_type; #ifdef CONFIG_STACKTRACE struct stack_trace stacktrace; unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; @@ -114,6 +121,12 @@ static struct device_driver *current_driver __read_mostly; static DEFINE_RWLOCK(driver_name_lock); +static const char *const maperr2str[] = { + [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", + [MAP_ERR_NOT_CHECKED] = "dma map error not checked", + [MAP_ERR_CHECKED] = "dma map error checked", +}; + static const char *type2name[4] = { "single", "page", "scather-gather", "coherent" }; @@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s\n", + "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, (unsigned long long)entry->paddr, entry->dev_addr, entry->size, - dir2name[entry->direction]); + dir2name[entry->direction], + maperr2str[entry->map_err_type]); } } @@ -838,13 +852,28 @@ static __init int dma_debug_entries_cmdline(char *str) __setup("dma_debug=", dma_debug_cmdline); __setup("dma_debug_entries=", dma_debug_entries_cmdline); +/* Calling dma_mapping_error() from dma-debug api will result in calling + debug_dma_mapping_error() - need internal mapping error routine to + avoid debug checks */ +#ifndef DMA_ERROR_CODE +#define DMA_ERROR_CODE 0 +#endif +static inline int has_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); + + return (dma_addr == DMA_ERROR_CODE); +} + static void check_unmap(struct dma_debug_entry *ref) { struct dma_debug_entry *entry; struct hash_bucket *bucket; unsigned long flags; - if (dma_mapping_error(ref->dev, ref->dev_addr)) { + if (unlikely(has_mapping_error(ref->dev, ref->dev_addr))) { err_printk(ref->dev, NULL, "DMA-API: device driver tries " "to free an invalid DMA memory address\n"); return; @@ -910,6 +939,15 @@ static void check_unmap(struct dma_debug_entry *ref) dir2name[ref->direction]); } + if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { + err_printk(ref->dev, entry, + "DMA-API: device driver failed to check map error" + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s]", + ref->dev_addr, ref->size, + type2name[entry->type]); + } + hash_bucket_del(entry); dma_entry_free(entry); @@ -1017,7 +1055,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, if (unlikely(global_disable)) return; - if (unlikely(dma_mapping_error(dev, dma_addr))) + if (unlikely(has_mapping_error(dev, dma_addr))) return; entry = dma_entry_alloc(); @@ -1030,6 +1068,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; + entry->map_err_type = MAP_ERR_NOT_CHECKED; if (map_single) entry->type = dma_debug_single; @@ -1045,6 +1084,30 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, } EXPORT_SYMBOL(debug_dma_map_page); +void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + struct dma_debug_entry ref; + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + if (unlikely(global_disable)) + return; + + ref.dev = dev; + ref.dev_addr = dma_addr; + bucket = get_hash_bucket(&ref, &flags); + entry = bucket_find_exact(bucket, &ref); + + if (!entry) + goto out; + + entry->map_err_type = MAP_ERR_CHECKED; +out: + put_hash_bucket(bucket, &flags); +} +EXPORT_SYMBOL(debug_dma_mapping_error); + void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single) { -- cgit v1.2.3 From 91872392f08486f692887d2f06a333f512648f22 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 9 Oct 2012 19:52:05 +0200 Subject: drivers/base: Add a DEVICE_BOOL_ATTR macro ... which, analogous to DEVICE_INT_ATTR provides functionality to set/clear bools. Its purpose is to be used where values need to be used as booleans in configuration context. Next patch uses this. Signed-off-by: Borislav Petkov Acked-by: Greg Kroah-Hartman Acked-by: Tony Luck --- drivers/base/core.c | 21 +++++++++++++++++++++ include/linux/device.h | 7 +++++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index abea76c36a4b..c8ae1f6b01b9 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -171,6 +171,27 @@ ssize_t device_show_int(struct device *dev, } EXPORT_SYMBOL_GPL(device_show_int); +ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct dev_ext_attribute *ea = to_ext_attr(attr); + + if (strtobool(buf, ea->var) < 0) + return -EINVAL; + + return size; +} +EXPORT_SYMBOL_GPL(device_store_bool); + +ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dev_ext_attribute *ea = to_ext_attr(attr); + + return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var)); +} +EXPORT_SYMBOL_GPL(device_show_bool); + /** * device_release - free device structure. * @kobj: device's kobject. diff --git a/include/linux/device.h b/include/linux/device.h index 86ef6ab553b1..50c85a26b003 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -496,6 +496,10 @@ ssize_t device_show_int(struct device *dev, struct device_attribute *attr, char *buf); ssize_t device_store_int(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); +ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, + char *buf); +ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); #define DEVICE_ATTR(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) @@ -505,6 +509,9 @@ ssize_t device_store_int(struct device *dev, struct device_attribute *attr, #define DEVICE_INT_ATTR(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_int, device_store_int), &(_var) } +#define DEVICE_BOOL_ATTR(_name, _mode, _var) \ + struct dev_ext_attribute dev_attr_##_name = \ + { __ATTR(_name, _mode, device_show_bool, device_store_bool), &(_var) } #define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = \ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) -- cgit v1.2.3 From 02b91b55260f7a1bdc8da25866cf27f726f5788f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 28 Jun 2012 18:26:52 +0200 Subject: drbd: introduce stop-sector to online verify We now can schedule only a specific range of sectors for online verify, or interrupt a running verify without interrupting the connection. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 17 +++++++++++++---- drivers/block/drbd/drbd_nl.c | 11 +++++++---- drivers/block/drbd/drbd_proc.c | 12 +++++++++--- drivers/block/drbd/drbd_receiver.c | 8 ++++++++ drivers/block/drbd/drbd_worker.c | 33 +++++++++++++++++++++++++++------ include/linux/drbd.h | 2 +- include/linux/drbd_nl.h | 1 + 8 files changed, 67 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9a6d3a4a739a..3cce7357402b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1052,6 +1052,7 @@ struct drbd_conf { /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; + sector_t ov_stop_sector; /* where are we now? (sector) */ sector_t ov_position; /* Start sector of out of sync range (to merge printk reporting). */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dfa08b7411c0..df9965d820c9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1231,13 +1231,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); - /* aborted verify run. log the last position */ + /* Aborted verify run, or we reached the stop sector. + * Log the last position, unless end-of-device. */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && - ns.conn < C_CONNECTED) { + ns.conn <= C_CONNECTED) { mdev->ov_start_sector = BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); + if (mdev->ov_left) + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); } if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && @@ -1703,6 +1705,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) drbd_send_state(mdev, ns); + /* Verify finished, or reached stop sector. Peer did not know about + * the stop sector, and we may even have changed the stop sector during + * verify to interrupt/stop early. Send the new state. */ + if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED + && mdev->agreed_pro_version >= 97) + drbd_send_state(mdev, ns); + /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { clear_bit(STATE_SENT, &mdev->flags); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ab660556a001..e2d368f1747e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2211,8 +2211,10 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { /* default to resume from last known position, if possible */ - struct start_ov args = - { .start_sector = mdev->ov_start_sector }; + struct start_ov args = { + .start_sector = mdev->ov_start_sector, + .stop_sector = ULLONG_MAX, + }; if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { reply->ret_code = ERR_MANDATORY_TAG; @@ -2224,8 +2226,9 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; + /* w_make_ov_request expects start position to be aligned */ + mdev->ov_start_sector = args.start_sector & ~(BM_SECT_PER_BIT-1); + mdev->ov_stop_sector = args.stop_sector; reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); drbd_resume_io(mdev); return 0; diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 5496104f90b9..a5a453b4355f 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) * we convert to sectors in the display below. */ unsigned long bm_bits = drbd_bm_bits(mdev); unsigned long bit_pos; + unsigned long long stop_sector = 0; if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) + mdev->state.conn == C_VERIFY_T) { bit_pos = bm_bits - mdev->ov_left; - else + if (mdev->agreed_pro_version >= 97) + stop_sector = mdev->ov_stop_sector; + } else bit_pos = mdev->bm_resync_fo; /* Total sectors may be slightly off for oddly * sized devices. So what. */ seq_printf(seq, - "\t%3d%% sector pos: %llu/%llu\n", + "\t%3d%% sector pos: %llu/%llu", (int)(bit_pos / (bm_bits/100+1)), (unsigned long long)bit_pos * BM_SECT_PER_BIT, (unsigned long long)bm_bits * BM_SECT_PER_BIT); + if (stop_sector != 0 && stop_sector != ULLONG_MAX) + seq_printf(seq, " stop sector: %llu", stop_sector); + seq_printf(seq, "\n"); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 434adf75259a..280735da1963 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3255,6 +3255,14 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } } + /* explicit verify finished notification, stop sector reached. */ + if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && + peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { + ov_oos_print(mdev); + drbd_resync_finished(mdev); + return true; + } + /* peer says his disk is inconsistent, while we think it is uptodate, * and this happens while the peer still thinks we have a sync going on, * but we think we are already done with the sync. diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6bce2cc179d4..1352455dd7dd 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -691,6 +691,7 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca int number, i, size; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + bool stop_sector_reached = false; if (unlikely(cancel)) return 1; @@ -699,9 +700,17 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca sector = mdev->ov_position; for (i = 0; i < number; i++) { - if (sector >= capacity) { + if (sector >= capacity) return 1; - } + + /* We check for "finished" only in the reply path: + * w_e_end_ov_reply(). + * We need to send at least one request out. */ + stop_sector_reached = i > 0 + && mdev->agreed_pro_version >= 97 + && sector >= mdev->ov_stop_sector; + if (stop_sector_reached) + break; size = BM_BLOCK_SIZE; @@ -725,7 +734,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca requeue: mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); - mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + if (i == 0 || !stop_sector_reached) + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); return 1; } @@ -808,7 +818,12 @@ int drbd_resync_finished(struct drbd_conf *mdev) dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; if (dt <= 0) dt = 1; + db = mdev->rs_total; + /* adjust for verify start and stop sectors, respective reached position */ + if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) + db -= mdev->ov_left; + dbdt = Bit2KB(db/dt); mdev->rs_paused /= HZ; @@ -831,7 +846,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns.conn = C_CONNECTED; dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", - verify_done ? "Online verify " : "Resync", + verify_done ? "Online verify" : "Resync", dt + mdev->rs_paused, mdev->rs_paused, dbdt); n_oos = drbd_bm_total_weight(mdev); @@ -912,7 +927,9 @@ out: mdev->rs_total = 0; mdev->rs_failed = 0; mdev->rs_paused = 0; - if (verify_done) + + /* reset start sector, if we reached end of device */ + if (verify_done && mdev->ov_left == 0) mdev->ov_start_sector = 0; drbd_md_sync(mdev); @@ -1158,6 +1175,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) unsigned int size = e->size; int digest_size; int ok, eq = 0; + bool stop_sector_reached = false; if (unlikely(cancel)) { drbd_free_ee(mdev, e); @@ -1208,7 +1226,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if ((mdev->ov_left & 0x200) == 0x200) drbd_advance_rs_marks(mdev, mdev->ov_left); - if (mdev->ov_left == 0) { + stop_sector_reached = mdev->agreed_pro_version >= 97 && + (sector + (size>>9)) >= mdev->ov_stop_sector; + + if (mdev->ov_left == 0 || stop_sector_reached) { ov_oos_print(mdev); drbd_resync_finished(mdev); } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 47e3d4850584..4a7eccbd1292 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.13" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 97 enum drbd_io_error_p { diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index a8706f08ab36..f6a576df19e0 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -145,6 +145,7 @@ NL_PACKET(dump_ee, 24, NL_PACKET(start_ov, 25, NL_INT64( 66, T_MAY_IGNORE, start_sector) + NL_INT64( 90, T_MANDATORY, stop_sector) ) NL_PACKET(new_c_uuid, 26, -- cgit v1.2.3 From ccae7868b0c5697508a541c531cf96b361d62c1c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 26 Sep 2012 14:07:04 +0200 Subject: drbd: log request sector offset and size for IO errors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 19 ++++++++++++++++++- include/linux/drbd.h | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d9e5962a9a8c..135ea76ed502 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -387,6 +387,20 @@ out_conflict: return 1; } +static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *req) +{ + char b[BDEVNAME_SIZE]; + + if (__ratelimit(&drbd_ratelimit_state)) + return; + + dev_warn(DEV, "local %s IO error sector %llu+%u on %s\n", + (req->rq_state & RQ_WRITE) ? "WRITE" : "READ", + (unsigned long long)req->sector, + req->size >> 9, + bdevname(mdev->ldev->backing_bdev, b)); +} + /* obviously this could be coded as many single functions * instead of one huge switch, * or by putting the code directly in the respective locations @@ -455,6 +469,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; + drbd_report_io_error(mdev, req); __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR); _req_may_be_done_not_susp(req, m); break; @@ -477,6 +492,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; } + drbd_report_io_error(mdev, req); __drbd_chk_io_error(mdev, DRBD_READ_ERROR); goto_queue_for_net_read: @@ -900,7 +916,8 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns if (!(local || remote) && !is_susp(mdev->state)) { if (__ratelimit(&drbd_ratelimit_state)) - dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + dev_err(DEV, "IO ERROR: neither local nor remote data, sector %llu+%u\n", + (unsigned long long)req->sector, req->size >> 9); goto fail_free_complete; } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 4a7eccbd1292..94f58a102bbb 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.13" +#define REL_VERSION "8.3.14" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 97 -- cgit v1.2.3 From 5d9db883761ad1bc2245fd3018715549b974203d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 5 Oct 2012 13:54:56 +0800 Subject: efi: Add support for a UEFI variable filesystem The existing EFI variables code only supports variables of up to 1024 bytes. This limitation existed in version 0.99 of the EFI specification, but was removed before any full releases. Since variables can now be larger than a single page, sysfs isn't the best interface for this. So, instead, let's add a filesystem. Variables can be read, written and created, with the first 4 bytes of each variable representing its UEFI attributes. The create() method doesn't actually commit to flash since zero-length variables can't exist per-spec. Updates from Jeremy Kerr . Signed-off-by: Matthew Garrett Signed-off-by: Jeremy Kerr Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 384 ++++++++++++++++++++++++++++++++++++++++++++- include/linux/efi.h | 5 + 2 files changed, 383 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index d10c9873dd9a..b605c4849772 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -80,6 +80,10 @@ #include #include +#include +#include +#include + #include #define EFIVARS_VERSION "0.08" @@ -91,6 +95,7 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(EFIVARS_VERSION); #define DUMP_NAME_LEN 52 +#define GUID_LEN 37 /* * The maximum size of VariableName + Data = 1024 @@ -108,7 +113,6 @@ struct efi_variable { __u32 Attributes; } __attribute__((packed)); - struct efivar_entry { struct efivars *efivars; struct efi_variable var; @@ -122,6 +126,9 @@ struct efivar_attribute { ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count); }; +static struct efivars __efivars; +static struct efivar_operations ops; + #define PSTORE_EFI_ATTRIBUTES \ (EFI_VARIABLE_NON_VOLATILE | \ EFI_VARIABLE_BOOTSERVICE_ACCESS | \ @@ -629,14 +636,380 @@ static struct kobj_type efivar_ktype = { .default_attrs = def_attrs, }; -static struct pstore_info efi_pstore_info; - static inline void efivar_unregister(struct efivar_entry *var) { kobject_put(&var->kobj); } +static int efivarfs_file_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static ssize_t efivarfs_file_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *ppos) +{ + struct efivar_entry *var = file->private_data; + struct efivars *efivars; + efi_status_t status; + void *data; + u32 attributes; + struct inode *inode = file->f_mapping->host; + int datasize = count - sizeof(attributes); + + if (count < sizeof(attributes)) + return -EINVAL; + + data = kmalloc(datasize, GFP_KERNEL); + + if (!data) + return -ENOMEM; + + efivars = var->efivars; + + if (copy_from_user(&attributes, userbuf, sizeof(attributes))) { + count = -EFAULT; + goto out; + } + + if (attributes & ~(EFI_VARIABLE_MASK)) { + count = -EINVAL; + goto out; + } + + if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) { + count = -EFAULT; + goto out; + } + + if (validate_var(&var->var, data, datasize) == false) { + count = -EINVAL; + goto out; + } + + status = efivars->ops->set_variable(var->var.VariableName, + &var->var.VendorGuid, + attributes, datasize, + data); + + switch (status) { + case EFI_SUCCESS: + mutex_lock(&inode->i_mutex); + i_size_write(inode, count); + mutex_unlock(&inode->i_mutex); + break; + case EFI_INVALID_PARAMETER: + count = -EINVAL; + break; + case EFI_OUT_OF_RESOURCES: + count = -ENOSPC; + break; + case EFI_DEVICE_ERROR: + count = -EIO; + break; + case EFI_WRITE_PROTECTED: + count = -EROFS; + break; + case EFI_SECURITY_VIOLATION: + count = -EACCES; + break; + case EFI_NOT_FOUND: + count = -ENOENT; + break; + default: + count = -EINVAL; + break; + } +out: + kfree(data); + + return count; +} + +static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct efivar_entry *var = file->private_data; + struct efivars *efivars = var->efivars; + efi_status_t status; + unsigned long datasize = 0; + u32 attributes; + void *data; + ssize_t size; + + status = efivars->ops->get_variable(var->var.VariableName, + &var->var.VendorGuid, + &attributes, &datasize, NULL); + + if (status != EFI_BUFFER_TOO_SMALL) + return 0; + + data = kmalloc(datasize + 4, GFP_KERNEL); + + if (!data) + return 0; + + status = efivars->ops->get_variable(var->var.VariableName, + &var->var.VendorGuid, + &attributes, &datasize, + (data + 4)); + + if (status != EFI_SUCCESS) + return 0; + + memcpy(data, &attributes, 4); + size = simple_read_from_buffer(userbuf, count, ppos, + data, datasize + 4); + kfree(data); + + return size; +} + +static void efivarfs_evict_inode(struct inode *inode) +{ + clear_inode(inode); +} + +static const struct super_operations efivarfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .evict_inode = efivarfs_evict_inode, + .show_options = generic_show_options, +}; + +static struct super_block *efivarfs_sb; + +static const struct inode_operations efivarfs_dir_inode_operations; + +static const struct file_operations efivarfs_file_operations = { + .open = efivarfs_file_open, + .read = efivarfs_file_read, + .write = efivarfs_file_write, + .llseek = no_llseek, +}; + +static struct inode *efivarfs_get_inode(struct super_block *sb, + const struct inode *dir, int mode, dev_t dev) +{ + struct inode *inode = new_inode(sb); + + if (inode) { + inode->i_ino = get_next_ino(); + inode->i_uid = inode->i_gid = 0; + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + case S_IFREG: + inode->i_fop = &efivarfs_file_operations; + break; + case S_IFDIR: + inode->i_op = &efivarfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inc_nlink(inode); + break; + } + } + return inode; +} + +static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid) +{ + guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]); + guid->b[1] = hex_to_bin(str[4]) << 4 | hex_to_bin(str[5]); + guid->b[2] = hex_to_bin(str[2]) << 4 | hex_to_bin(str[3]); + guid->b[3] = hex_to_bin(str[0]) << 4 | hex_to_bin(str[1]); + guid->b[4] = hex_to_bin(str[11]) << 4 | hex_to_bin(str[12]); + guid->b[5] = hex_to_bin(str[9]) << 4 | hex_to_bin(str[10]); + guid->b[6] = hex_to_bin(str[16]) << 4 | hex_to_bin(str[17]); + guid->b[7] = hex_to_bin(str[14]) << 4 | hex_to_bin(str[15]); + guid->b[8] = hex_to_bin(str[19]) << 4 | hex_to_bin(str[20]); + guid->b[9] = hex_to_bin(str[21]) << 4 | hex_to_bin(str[22]); + guid->b[10] = hex_to_bin(str[24]) << 4 | hex_to_bin(str[25]); + guid->b[11] = hex_to_bin(str[26]) << 4 | hex_to_bin(str[27]); + guid->b[12] = hex_to_bin(str[28]) << 4 | hex_to_bin(str[29]); + guid->b[13] = hex_to_bin(str[30]) << 4 | hex_to_bin(str[31]); + guid->b[14] = hex_to_bin(str[32]) << 4 | hex_to_bin(str[33]); + guid->b[15] = hex_to_bin(str[34]) << 4 | hex_to_bin(str[35]); +} + +static int efivarfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl) +{ + struct inode *inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0); + struct efivars *efivars = &__efivars; + struct efivar_entry *var; + int namelen, i = 0, err = 0; + + if (dentry->d_name.len < 38) + return -EINVAL; + + if (!inode) + return -ENOSPC; + + var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL); + + if (!var) + return -ENOMEM; + + namelen = dentry->d_name.len - GUID_LEN; + + efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1, + &var->var.VendorGuid); + + for (i = 0; i < namelen; i++) + var->var.VariableName[i] = dentry->d_name.name[i]; + + var->var.VariableName[i] = '\0'; + + inode->i_private = var; + var->efivars = efivars; + var->kobj.kset = efivars->kset; + + err = kobject_init_and_add(&var->kobj, &efivar_ktype, NULL, "%s", + dentry->d_name.name); + if (err) + goto out; + + kobject_uevent(&var->kobj, KOBJ_ADD); + spin_lock(&efivars->lock); + list_add(&var->list, &efivars->list); + spin_unlock(&efivars->lock); + d_instantiate(dentry, inode); + dget(dentry); +out: + if (err) + kfree(var); + return err; +} + +static int efivarfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct efivar_entry *var = dentry->d_inode->i_private; + struct efivars *efivars = var->efivars; + efi_status_t status; + + spin_lock(&efivars->lock); + + status = efivars->ops->set_variable(var->var.VariableName, + &var->var.VendorGuid, + 0, 0, NULL); + + if (status == EFI_SUCCESS || status == EFI_NOT_FOUND) { + list_del(&var->list); + spin_unlock(&efivars->lock); + efivar_unregister(var); + drop_nlink(dir); + dput(dentry); + return 0; + } + + spin_unlock(&efivars->lock); + return -EINVAL; +}; + +int efivarfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode = NULL; + struct dentry *root; + struct efivar_entry *entry, *n; + struct efivars *efivars = &__efivars; + int err; + + efivarfs_sb = sb; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = PSTOREFS_MAGIC; + sb->s_op = &efivarfs_ops; + sb->s_time_gran = 1; + + inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0); + if (!inode) { + err = -ENOMEM; + goto fail; + } + inode->i_op = &efivarfs_dir_inode_operations; + + root = d_make_root(inode); + sb->s_root = root; + if (!root) { + err = -ENOMEM; + goto fail; + } + + list_for_each_entry_safe(entry, n, &efivars->list, list) { + struct inode *inode; + struct dentry *dentry, *root = efivarfs_sb->s_root; + char *name; + unsigned long size = 0; + int len, i; + + len = utf16_strlen(entry->var.VariableName); + + /* GUID plus trailing NULL */ + name = kmalloc(len + 38, GFP_ATOMIC); + + for (i = 0; i < len; i++) + name[i] = entry->var.VariableName[i] & 0xFF; + + name[len] = '-'; + + efi_guid_unparse(&entry->var.VendorGuid, name + len + 1); + + name[len+GUID_LEN] = '\0'; + + inode = efivarfs_get_inode(efivarfs_sb, root->d_inode, + S_IFREG | 0644, 0); + dentry = d_alloc_name(root, name); + + efivars->ops->get_variable(entry->var.VariableName, + &entry->var.VendorGuid, + &entry->var.Attributes, + &size, + NULL); + + mutex_lock(&inode->i_mutex); + inode->i_private = entry; + i_size_write(inode, size+4); + mutex_unlock(&inode->i_mutex); + d_add(dentry, inode); + } + + return 0; +fail: + iput(inode); + return err; +} + +static struct dentry *efivarfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_single(fs_type, flags, data, efivarfs_fill_super); +} + +static void efivarfs_kill_sb(struct super_block *sb) +{ + kill_litter_super(sb); + efivarfs_sb = NULL; +} + +static struct file_system_type efivarfs_type = { + .name = "efivarfs", + .mount = efivarfs_mount, + .kill_sb = efivarfs_kill_sb, +}; + +static const struct inode_operations efivarfs_dir_inode_operations = { + .lookup = simple_lookup, + .unlink = efivarfs_unlink, + .create = efivarfs_create, +}; + +static struct pstore_info efi_pstore_info; + #ifdef CONFIG_PSTORE static int efi_pstore_open(struct pstore_info *psi) @@ -1198,6 +1571,8 @@ int register_efivars(struct efivars *efivars, pstore_register(&efivars->efi_pstore_info); } + register_filesystem(&efivarfs_type); + out: kfree(variable_name); @@ -1205,9 +1580,6 @@ out: } EXPORT_SYMBOL_GPL(register_efivars); -static struct efivars __efivars; -static struct efivar_operations ops; - /* * For now we register the efi subsystem with the firmware subsystem * and the vars subsystem with the efi subsystem. In the future, it diff --git a/include/linux/efi.h b/include/linux/efi.h index 8670eb1eb8cd..b2af1571592b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -29,7 +29,12 @@ #define EFI_UNSUPPORTED ( 3 | (1UL << (BITS_PER_LONG-1))) #define EFI_BAD_BUFFER_SIZE ( 4 | (1UL << (BITS_PER_LONG-1))) #define EFI_BUFFER_TOO_SMALL ( 5 | (1UL << (BITS_PER_LONG-1))) +#define EFI_NOT_READY ( 6 | (1UL << (BITS_PER_LONG-1))) +#define EFI_DEVICE_ERROR ( 7 | (1UL << (BITS_PER_LONG-1))) +#define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1))) +#define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1))) #define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1))) +#define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) typedef unsigned long efi_status_t; typedef u8 efi_bool_t; -- cgit v1.2.3 From 605e70c7aa1b7b0d554baf945630c1d606bbfbc3 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Fri, 5 Oct 2012 13:54:56 +0800 Subject: efi: add efivars kobject to efi sysfs folder UEFI variable filesystem need a new mount point, so this patch add efivars kobject to efi_kobj for create a /sys/firmware/efi/efivars folder. Cc: Matthew Garrett Cc: H. Peter Anvin Signed-off-by: Lee, Chun-Yi Signed-off-by: Jeremy Kerr Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 9 +++++++++ include/linux/efi.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index d7658b4a5010..6793965b7c8b 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -1527,6 +1527,7 @@ void unregister_efivars(struct efivars *efivars) sysfs_remove_bin_file(&efivars->kset->kobj, efivars->del_var); kfree(efivars->new_var); kfree(efivars->del_var); + kobject_put(efivars->kobject); kset_unregister(efivars->kset); } EXPORT_SYMBOL_GPL(unregister_efivars); @@ -1558,6 +1559,14 @@ int register_efivars(struct efivars *efivars, goto out; } + efivars->kobject = kobject_create_and_add("efivars", parent_kobj); + if (!efivars->kobject) { + pr_err("efivars: Subsystem registration failed.\n"); + error = -ENOMEM; + kset_unregister(efivars->kset); + goto out; + } + /* * Per EFI spec, the maximum storage allocated for both * the variable name and variable data is 1024 bytes. diff --git a/include/linux/efi.h b/include/linux/efi.h index b2af1571592b..337aefbfb003 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -662,6 +662,7 @@ struct efivars { spinlock_t lock; struct list_head list; struct kset *kset; + struct kobject *kobject; struct bin_attribute *new_var, *del_var; const struct efivar_operations *ops; struct efivar_entry *walk_entry; -- cgit v1.2.3 From bd52276fa1d420c3a504b76ffaaa1642cc79d4c4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 25 May 2012 16:20:31 +0100 Subject: x86-64/efi: Use EFI to deal with platform wall clock (again) Other than ix86, x86-64 on EFI so far didn't set the {g,s}et_wallclock accessors to the EFI routines, thus incorrectly using raw RTC accesses instead. Simply removing the #ifdef around the respective code isn't enough, however: While so far early get-time calls were done in physical mode, this doesn't work properly for x86-64, as virtual addresses would still need to be set up for all runtime regions (which wasn't the case on the system I have access to), so instead the patch moves the call to efi_enter_virtual_mode() ahead (which in turn allows to drop all code related to calling efi-get-time in physical mode). Additionally the earlier calling of efi_set_executable() requires the CPA code to cope, i.e. during early boot it must be avoided to call cpa_flush_array(), as the first thing this function does is a BUG_ON(irqs_disabled()). Also make the two EFI functions in question here static - they're not being referenced elsewhere. History: This commit was originally merged as bacef661acdb ("x86-64/efi: Use EFI to deal with platform wall clock") but it resulted in some ASUS machines no longer booting due to a firmware bug, and so was reverted in f026cfa82f62. A pre-emptive fix for the buggy ASUS firmware was merged in 03a1c254975e ("x86, efi: 1:1 pagetable mapping for virtual EFI calls") so now this patch can be reapplied. Signed-off-by: Jan Beulich Tested-by: Matt Fleming Acked-by: Matthew Garrett Cc: Ingo Molnar Cc: Peter Zijlstra Cc: H. Peter Anvin Signed-off-by: Matt Fleming [added commit history] --- arch/x86/mm/pageattr.c | 10 ++++++---- arch/x86/platform/efi/efi.c | 30 ++++-------------------------- include/linux/efi.h | 2 -- init/main.c | 8 ++++---- 4 files changed, 14 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d23503..931930a96160 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, /* * On success we use clflush, when the CPU supports it to - * avoid the wbindv. If the CPU does not support it and in the - * error case we fall back to cpa_flush_all (which uses - * wbindv): + * avoid the wbindv. If the CPU does not support it, in the + * error case, and during early boot (for EFI) we fall back + * to cpa_flush_all (which uses wbinvd): */ - if (!ret && cpu_has_clflush) { + if (early_boot_irqs_disabled) + __cpa_flush_all((void *)(long)cache); + else if (!ret && cpu_has_clflush) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { cpa_flush_array(addr, numpages, cache, cpa.flags, pages); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index aded2a91162a..757834434e59 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -235,22 +235,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map( return status; } -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, - efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), - virt_to_phys(tc)); - efi_call_phys_epilog(); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - -int efi_set_rtc_mmss(unsigned long nowtime) +static int efi_set_rtc_mmss(unsigned long nowtime) { int real_seconds, real_minutes; efi_status_t status; @@ -279,7 +264,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) return 0; } -unsigned long efi_get_time(void) +static unsigned long efi_get_time(void) { efi_status_t status; efi_time_t eft; @@ -635,18 +620,13 @@ static int __init efi_runtime_init(void) } /* * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map + * EFI runtime service before set_virtual_address_map * is invoked. */ - efi_phys.get_time = (efi_get_time_t *)runtime->get_time; efi_phys.set_virtual_address_map = (efi_set_virtual_address_map_t *) runtime->set_virtual_address_map; - /* - * Make efi_get_time can be called before entering - * virtual mode. - */ - efi.get_time = phys_efi_get_time; + early_iounmap(runtime, sizeof(efi_runtime_services_t)); return 0; @@ -734,12 +714,10 @@ void __init efi_init(void) efi_enabled = 0; return; } -#ifdef CONFIG_X86_32 if (efi_native) { x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; } -#endif #if EFI_DEBUG print_efi_memmap(); diff --git a/include/linux/efi.h b/include/linux/efi.h index 337aefbfb003..5e2308d9c6be 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -516,8 +516,6 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -extern unsigned long efi_get_time(void); -extern int efi_set_rtc_mmss(unsigned long nowtime); extern void efi_reserve_boot_services(void); extern struct efi_memory_map memmap; diff --git a/init/main.c b/init/main.c index 9cf77ab138a6..ae70b647b4d9 100644 --- a/init/main.c +++ b/init/main.c @@ -461,6 +461,10 @@ static void __init mm_init(void) percpu_init_late(); pgtable_cache_init(); vmalloc_init(); +#ifdef CONFIG_X86 + if (efi_enabled) + efi_enter_virtual_mode(); +#endif } asmlinkage void __init start_kernel(void) @@ -601,10 +605,6 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); anon_vma_init(); -#ifdef CONFIG_X86 - if (efi_enabled) - efi_enter_virtual_mode(); -#endif thread_info_cache_init(); cred_init(); fork_init(totalram_pages); -- cgit v1.2.3 From e05ed4d1fad9e730995abb08cb9bc3bffac5018b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 15 Oct 2012 10:19:39 -0700 Subject: swiotlb: Return physical addresses when calling swiotlb_tbl_map_single This change makes it so that swiotlb_tbl_map_single will return a physical address instead of a virtual address when called. The advantage to this once again is that we are avoiding a number of virt_to_phys and phys_to_virt translations by working with everything as a physical address. One change I had to make in order to support using physical addresses is that I could no longer trust 0 to be a invalid physical address on all platforms. So instead I made it so that ~0 is returned on error. This should never be a valid return value as it implies that only one byte would be available for use. In order to clarify things since we now have 2 physical addresses in use inside of swiotlb_tbl_map_single I am renaming phys to orig_addr, and dma_addr to tlb_addr. This way is should be clear that orig_addr is contained within io_orig_addr and tlb_addr is an address within the io_tlb_addr buffer. Signed-off-by: Alexander Duyck Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 22 ++++++------- include/linux/swiotlb.h | 11 +++++-- lib/swiotlb.c | 78 ++++++++++++++++++++++++----------------------- 3 files changed, 59 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 58db6df866ef..8a6035aa69c9 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -338,9 +338,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { - phys_addr_t phys = page_to_phys(page) + offset; + phys_addr_t map, phys = page_to_phys(page) + offset; dma_addr_t dev_addr = xen_phys_to_bus(phys); - void *map; BUG_ON(dir == DMA_NONE); /* @@ -356,16 +355,16 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * Oh well, have to allocate and map a bounce buffer. */ map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir); - if (!map) + if (map == SWIOTLB_MAP_ERROR) return DMA_ERROR_CODE; - dev_addr = xen_virt_to_bus(map); + dev_addr = xen_phys_to_bus(map); /* * Ensure that the address returned is DMA'ble */ if (!dma_capable(dev, dev_addr, size)) { - swiotlb_tbl_unmap_single(dev, map, size, dir); + swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir); dev_addr = 0; } return dev_addr; @@ -494,11 +493,12 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, if (swiotlb_force || !dma_capable(hwdev, dev_addr, sg->length) || range_straddles_page_boundary(paddr, sg->length)) { - void *map = swiotlb_tbl_map_single(hwdev, - start_dma_addr, - sg_phys(sg), - sg->length, dir); - if (!map) { + phys_addr_t map = swiotlb_tbl_map_single(hwdev, + start_dma_addr, + sg_phys(sg), + sg->length, + dir); + if (map == SWIOTLB_MAP_ERROR) { /* Don't panic here, we expect map_sg users to do proper error handling. */ xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, @@ -506,7 +506,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, sgl[0].dma_length = 0; return DMA_ERROR_CODE; } - sg->dma_address = xen_virt_to_bus(map); + sg->dma_address = xen_phys_to_bus(map); } else sg->dma_address = dev_addr; sg->dma_length = sg->length; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 8d08b3ed406d..1995f3e04fed 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -34,9 +34,14 @@ enum dma_sync_target { SYNC_FOR_CPU = 0, SYNC_FOR_DEVICE = 1, }; -extern void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, - phys_addr_t phys, size_t size, - enum dma_data_direction dir); + +/* define the last possible byte of physical address space as a mapping error */ +#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0) + +extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, + dma_addr_t tbl_dma_addr, + phys_addr_t phys, size_t size, + enum dma_data_direction dir); extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, enum dma_data_direction dir); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index f8c0d4e1d1d3..3adc148bb8d8 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -393,12 +393,13 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, } EXPORT_SYMBOL_GPL(swiotlb_bounce); -void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, - phys_addr_t phys, size_t size, - enum dma_data_direction dir) +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, + dma_addr_t tbl_dma_addr, + phys_addr_t orig_addr, size_t size, + enum dma_data_direction dir) { unsigned long flags; - char *dma_addr; + phys_addr_t tlb_addr; unsigned int nslots, stride, index, wrap; int i; unsigned long mask; @@ -462,7 +463,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, io_tlb_list[i] = 0; for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) io_tlb_list[i] = ++count; - dma_addr = (char *)phys_to_virt(io_tlb_start) + (index << IO_TLB_SHIFT); + tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); /* * Update the indices to avoid searching in the next @@ -480,7 +481,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); - return NULL; + return SWIOTLB_MAP_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); @@ -490,11 +491,12 @@ found: * needed. */ for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); + io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size, + DMA_TO_DEVICE); - return dma_addr; + return tlb_addr; } EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); @@ -502,9 +504,8 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); * Allocates bounce buffer and returns its kernel virtual address. */ -static void * -map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir) +phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) { dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); @@ -598,12 +599,15 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_mask = hwdev->coherent_dma_mask; ret = (void *)__get_free_pages(flags, order); - if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { - /* - * The allocated memory isn't reachable by the device. - */ - free_pages((unsigned long) ret, order); - ret = NULL; + if (ret) { + dev_addr = swiotlb_virt_to_bus(hwdev, ret); + if (dev_addr + size - 1 > dma_mask) { + /* + * The allocated memory isn't reachable by the device. + */ + free_pages((unsigned long) ret, order); + ret = NULL; + } } if (!ret) { /* @@ -611,13 +615,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ - ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); - if (!ret) + phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); + if (paddr == SWIOTLB_MAP_ERROR) return NULL; - } - memset(ret, 0, size); - dev_addr = swiotlb_virt_to_bus(hwdev, ret); + ret = phys_to_virt(paddr); + dev_addr = phys_to_dma(hwdev, paddr); + } /* Confirm address can be DMA'd by device */ if (dev_addr + size - 1 > dma_mask) { @@ -629,7 +633,10 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); return NULL; } + *dma_handle = dev_addr; + memset(ret, 0, size); + return ret; } EXPORT_SYMBOL(swiotlb_alloc_coherent); @@ -686,9 +693,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { - phys_addr_t phys = page_to_phys(page) + offset; + phys_addr_t map, phys = page_to_phys(page) + offset; dma_addr_t dev_addr = phys_to_dma(dev, phys); - void *map; BUG_ON(dir == DMA_NONE); /* @@ -699,22 +705,18 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size) && !swiotlb_force) return dev_addr; - /* - * Oh well, have to allocate and map a bounce buffer. - */ + /* Oh well, have to allocate and map a bounce buffer. */ map = map_single(dev, phys, size, dir); - if (!map) { + if (map == SWIOTLB_MAP_ERROR) { swiotlb_full(dev, size, dir, 1); return phys_to_dma(dev, io_tlb_overflow_buffer); } - dev_addr = swiotlb_virt_to_bus(dev, map); + dev_addr = phys_to_dma(dev, map); - /* - * Ensure that the address returned is DMA'ble - */ + /* Ensure that the address returned is DMA'ble */ if (!dma_capable(dev, dev_addr, size)) { - swiotlb_tbl_unmap_single(dev, map, size, dir); + swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir); return phys_to_dma(dev, io_tlb_overflow_buffer); } @@ -840,9 +842,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, if (swiotlb_force || !dma_capable(hwdev, dev_addr, sg->length)) { - void *map = map_single(hwdev, sg_phys(sg), - sg->length, dir); - if (!map) { + phys_addr_t map = map_single(hwdev, sg_phys(sg), + sg->length, dir); + if (map == SWIOTLB_MAP_ERROR) { /* Don't panic here, we expect map_sg users to do proper error handling. */ swiotlb_full(hwdev, sg->length, dir, 0); @@ -851,7 +853,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, sgl[0].dma_length = 0; return 0; } - sg->dma_address = swiotlb_virt_to_bus(hwdev, map); + sg->dma_address = phys_to_dma(hwdev, map); } else sg->dma_address = dev_addr; sg->dma_length = sg->length; -- cgit v1.2.3 From 61ca08c3220032dd88815b3465d56cb779258168 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 15 Oct 2012 10:19:44 -0700 Subject: swiotlb: Use physical addresses for swiotlb_tbl_unmap_single This change makes it so that the unmap functionality also uses physical addresses. This helps to further reduce the use of virt_to_phys and phys_to_virt functions. In order to clarify things since we now have 2 physical addresses in use inside of swiotlb_tbl_unmap_single I am renaming phys to orig_addr, and dma_addr to tlb_addr. This way is should be clear that orig_addr is contained within io_orig_addr and tlb_addr is an address within the io_tlb_addr buffer. Signed-off-by: Alexander Duyck Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 4 ++-- include/linux/swiotlb.h | 3 ++- lib/swiotlb.c | 37 +++++++++++++++++++------------------ 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 8a6035aa69c9..4cedc284b5df 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -364,7 +364,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * Ensure that the address returned is DMA'ble */ if (!dma_capable(dev, dev_addr, size)) { - swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir); + swiotlb_tbl_unmap_single(dev, map, size, dir); dev_addr = 0; } return dev_addr; @@ -388,7 +388,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) { - swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); return; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 1995f3e04fed..291643c6b88b 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -43,7 +43,8 @@ extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir); -extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, +extern void swiotlb_tbl_unmap_single(struct device *hwdev, + phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir); extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 3adc148bb8d8..d7701dcf407f 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -515,20 +515,20 @@ phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ -void -swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, - enum dma_data_direction dir) +void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (dma_addr - (char *)phys_to_virt(io_tlb_start)) >> IO_TLB_SHIFT; - phys_addr_t phys = io_tlb_orig_addr[index]; + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; /* * First, sync the memory before unmapping the entry */ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); + swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), + size, DMA_FROM_DEVICE); /* * Return the buffer to the free list by setting the corresponding @@ -621,17 +621,18 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, ret = phys_to_virt(paddr); dev_addr = phys_to_dma(hwdev, paddr); - } - /* Confirm address can be DMA'd by device */ - if (dev_addr + size - 1 > dma_mask) { - printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dma_mask, - (unsigned long long)dev_addr); + /* Confirm address can be DMA'd by device */ + if (dev_addr + size - 1 > dma_mask) { + printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", + (unsigned long long)dma_mask, + (unsigned long long)dev_addr); - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); - return NULL; + /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ + swiotlb_tbl_unmap_single(hwdev, paddr, + size, DMA_TO_DEVICE); + return NULL; + } } *dma_handle = dev_addr; @@ -652,7 +653,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); else /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ - swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); @@ -716,7 +717,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, /* Ensure that the address returned is DMA'ble */ if (!dma_capable(dev, dev_addr, size)) { - swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir); + swiotlb_tbl_unmap_single(dev, map, size, dir); return phys_to_dma(dev, io_tlb_overflow_buffer); } @@ -740,7 +741,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); return; } -- cgit v1.2.3 From fbfda893eb570bbe9e9ad9128b6e9cf2a1e48c87 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 15 Oct 2012 10:19:49 -0700 Subject: swiotlb: Use physical addresses instead of virtual in swiotlb_tbl_sync_single This change makes it so that the sync functionality also uses physical addresses. This helps to further reduce the use of virt_to_phys and phys_to_virt functions. In order to clarify things since we now have 2 physical addresses in use inside of swiotlb_tbl_sync_single I am renaming phys to orig_addr, and dma_addr to tlb_addr. This way is should be clear that orig_addr is contained within io_orig_addr and tlb_addr is an address within the io_tlb_addr buffer. Signed-off-by: Alexander Duyck Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 3 +-- include/linux/swiotlb.h | 3 ++- lib/swiotlb.c | 22 +++++++++++----------- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4cedc284b5df..af47e7594460 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -433,8 +433,7 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) { - swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, - target); + swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); return; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 291643c6b88b..e0ac98fd81a9 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -47,7 +47,8 @@ extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir); -extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, +extern void swiotlb_tbl_sync_single(struct device *hwdev, + phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, enum dma_sync_target target); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d7701dcf407f..16a548dc91ac 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -557,26 +557,27 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, } EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); -void -swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, - enum dma_data_direction dir, - enum dma_sync_target target) +void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) { - int index = (dma_addr - (char *)phys_to_virt(io_tlb_start)) >> IO_TLB_SHIFT; - phys_addr_t phys = io_tlb_orig_addr[index]; + int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = io_tlb_orig_addr[index]; - phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); + orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); + swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), + size, DMA_FROM_DEVICE); else BUG_ON(dir != DMA_TO_DEVICE); break; case SYNC_FOR_DEVICE: if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), + size, DMA_TO_DEVICE); else BUG_ON(dir != DMA_FROM_DEVICE); break; @@ -785,8 +786,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, - target); + swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); return; } -- cgit v1.2.3 From af51a9f1848ff50079a10def56a2c064f326af22 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 15 Oct 2012 10:19:55 -0700 Subject: swiotlb: Do not export swiotlb_bounce since there are no external consumers Currently swiotlb is the only consumer for swiotlb_bounce. Since that is the case it doesn't make much sense to be exporting it so make it a static function only. In addition we can save a few more lines of code by making it so that it accepts the DMA address as a physical address instead of a virtual one. This is the last piece in essentially pushing all of the DMA address values to use physical addresses in swiotlb. In order to clarify things since we now have 2 physical addresses in use inside of swiotlb_bounce I am renaming phys to orig_addr, and dma_addr to tlb_addr. This way is should be clear that orig_addr is contained within io_orig_addr and tlb_addr is an address within the io_tlb_addr buffer. Signed-off-by: Alexander Duyck Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 3 --- lib/swiotlb.c | 35 ++++++++++++++++------------------- 2 files changed, 16 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index e0ac98fd81a9..071d62c214a6 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -53,9 +53,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev, enum dma_sync_target target); /* Accessory functions. */ -extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir); - extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 16a548dc91ac..196b06984dec 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -355,14 +355,15 @@ static int is_swiotlb_buffer(phys_addr_t paddr) /* * Bounce: copy the swiotlb buffer back to the original dma location */ -void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir) +static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir) { - unsigned long pfn = PFN_DOWN(phys); + unsigned long pfn = PFN_DOWN(orig_addr); + unsigned char *vaddr = phys_to_virt(tlb_addr); if (PageHighMem(pfn_to_page(pfn))) { /* The buffer does not have a mapping. Map it in and copy */ - unsigned int offset = phys & ~PAGE_MASK; + unsigned int offset = orig_addr & ~PAGE_MASK; char *buffer; unsigned int sz = 0; unsigned long flags; @@ -373,25 +374,23 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, local_irq_save(flags); buffer = kmap_atomic(pfn_to_page(pfn)); if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, buffer + offset, sz); + memcpy(vaddr, buffer + offset, sz); else - memcpy(buffer + offset, dma_addr, sz); + memcpy(buffer + offset, vaddr, sz); kunmap_atomic(buffer); local_irq_restore(flags); size -= sz; pfn++; - dma_addr += sz; + vaddr += sz; offset = 0; } + } else if (dir == DMA_TO_DEVICE) { + memcpy(vaddr, phys_to_virt(orig_addr), size); } else { - if (dir == DMA_TO_DEVICE) - memcpy(dma_addr, phys_to_virt(phys), size); - else - memcpy(phys_to_virt(phys), dma_addr, size); + memcpy(phys_to_virt(orig_addr), vaddr, size); } } -EXPORT_SYMBOL_GPL(swiotlb_bounce); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, @@ -493,8 +492,7 @@ found: for (i = 0; i < nslots; i++) io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size, - DMA_TO_DEVICE); + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); return tlb_addr; } @@ -526,9 +524,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, /* * First, sync the memory before unmapping the entry */ - if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), - size, DMA_FROM_DEVICE); + if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); /* * Return the buffer to the free list by setting the corresponding @@ -569,14 +566,14 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); else BUG_ON(dir != DMA_TO_DEVICE); break; case SYNC_FOR_DEVICE: if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), + swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); else BUG_ON(dir != DMA_FROM_DEVICE); -- cgit v1.2.3 From dc4dc36056392c0b0b1ca9e81bebff964b9297e0 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 30 Oct 2012 12:34:05 +0530 Subject: spi: tegra: add spi driver for SLINK controller Tegra20/Tegra30 supports the spi interface through its SLINK controller. Add spi driver for SLINK controller. Signed-off-by: Laxman Dewangan Reviewed-by: Stephen Warren Signed-off-by: Mark Brown --- .../bindings/spi/nvidia,tegra20-slink.txt | 26 + drivers/spi/Kconfig | 6 + drivers/spi/Makefile | 2 +- drivers/spi/spi-tegra20-slink.c | 1359 ++++++++++++++++++++ include/linux/spi/spi-tegra.h | 40 + 5 files changed, 1432 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt create mode 100644 drivers/spi/spi-tegra20-slink.c create mode 100644 include/linux/spi/spi-tegra.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt new file mode 100644 index 000000000000..f5b1ad1a1ec3 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt @@ -0,0 +1,26 @@ +NVIDIA Tegra20/Tegra30 SLINK controller. + +Required properties: +- compatible : should be "nvidia,tegra20-slink", "nvidia,tegra30-slink". +- reg: Should contain SLINK registers location and length. +- interrupts: Should contain SLINK interrupts. +- nvidia,dma-request-selector : The Tegra DMA controller's phandle and + request selector for this SLINK controller. + +Recommended properties: +- spi-max-frequency: Definition as per + Documentation/devicetree/bindings/spi/spi-bus.txt + +Example: + +slink@7000d600 { + compatible = "nvidia,tegra20-slink"; + reg = <0x7000d600 0x200>; + interrupts = <0 82 0x04>; + nvidia,dma-request-selector = <&apbdma 16>; + spi-max-frequency = <25000000>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; +}; + diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 1acae359cabe..25290d9780b2 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -385,6 +385,12 @@ config SPI_MXS help SPI driver for Freescale MXS devices. +config SPI_TEGRA20_SLINK + tristate "Nvidia Tegra20/Tegra30 SLINK Controller" + depends on ARCH_TEGRA && TEGRA20_APB_DMA + help + SPI driver for Nvidia Tegra20/Tegra30 SLINK Controller interface. + config SPI_TI_SSP tristate "TI Sequencer Serial Port - SPI Support" depends on MFD_TI_SSP diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index c48df47e4b0f..f87c0f142e5a 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -60,10 +60,10 @@ obj-$(CONFIG_SPI_SH_MSIOF) += spi-sh-msiof.o obj-$(CONFIG_SPI_SH_SCI) += spi-sh-sci.o obj-$(CONFIG_SPI_SIRF) += spi-sirf.o obj-$(CONFIG_SPI_STMP3XXX) += spi-stmp.o +obj-$(CONFIG_SPI_TEGRA20_SLINK) += spi-tegra20-slink.o obj-$(CONFIG_SPI_TI_SSP) += spi-ti-ssp.o obj-$(CONFIG_SPI_TLE62X0) += spi-tle62x0.o obj-$(CONFIG_SPI_TOPCLIFF_PCH) += spi-topcliff-pch.o obj-$(CONFIG_SPI_TXX9) += spi-txx9.o obj-$(CONFIG_SPI_XCOMM) += spi-xcomm.o obj-$(CONFIG_SPI_XILINX) += spi-xilinx.o - diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c new file mode 100644 index 000000000000..b8985be81d96 --- /dev/null +++ b/drivers/spi/spi-tegra20-slink.c @@ -0,0 +1,1359 @@ +/* + * SPI driver for Nvidia's Tegra20/Tegra30 SLINK Controller. + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SLINK_COMMAND 0x000 +#define SLINK_BIT_LENGTH(x) (((x) & 0x1f) << 0) +#define SLINK_WORD_SIZE(x) (((x) & 0x1f) << 5) +#define SLINK_BOTH_EN (1 << 10) +#define SLINK_CS_SW (1 << 11) +#define SLINK_CS_VALUE (1 << 12) +#define SLINK_CS_POLARITY (1 << 13) +#define SLINK_IDLE_SDA_DRIVE_LOW (0 << 16) +#define SLINK_IDLE_SDA_DRIVE_HIGH (1 << 16) +#define SLINK_IDLE_SDA_PULL_LOW (2 << 16) +#define SLINK_IDLE_SDA_PULL_HIGH (3 << 16) +#define SLINK_IDLE_SDA_MASK (3 << 16) +#define SLINK_CS_POLARITY1 (1 << 20) +#define SLINK_CK_SDA (1 << 21) +#define SLINK_CS_POLARITY2 (1 << 22) +#define SLINK_CS_POLARITY3 (1 << 23) +#define SLINK_IDLE_SCLK_DRIVE_LOW (0 << 24) +#define SLINK_IDLE_SCLK_DRIVE_HIGH (1 << 24) +#define SLINK_IDLE_SCLK_PULL_LOW (2 << 24) +#define SLINK_IDLE_SCLK_PULL_HIGH (3 << 24) +#define SLINK_IDLE_SCLK_MASK (3 << 24) +#define SLINK_M_S (1 << 28) +#define SLINK_WAIT (1 << 29) +#define SLINK_GO (1 << 30) +#define SLINK_ENB (1 << 31) + +#define SLINK_MODES (SLINK_IDLE_SCLK_MASK | SLINK_CK_SDA) + +#define SLINK_COMMAND2 0x004 +#define SLINK_LSBFE (1 << 0) +#define SLINK_SSOE (1 << 1) +#define SLINK_SPIE (1 << 4) +#define SLINK_BIDIROE (1 << 6) +#define SLINK_MODFEN (1 << 7) +#define SLINK_INT_SIZE(x) (((x) & 0x1f) << 8) +#define SLINK_CS_ACTIVE_BETWEEN (1 << 17) +#define SLINK_SS_EN_CS(x) (((x) & 0x3) << 18) +#define SLINK_SS_SETUP(x) (((x) & 0x3) << 20) +#define SLINK_FIFO_REFILLS_0 (0 << 22) +#define SLINK_FIFO_REFILLS_1 (1 << 22) +#define SLINK_FIFO_REFILLS_2 (2 << 22) +#define SLINK_FIFO_REFILLS_3 (3 << 22) +#define SLINK_FIFO_REFILLS_MASK (3 << 22) +#define SLINK_WAIT_PACK_INT(x) (((x) & 0x7) << 26) +#define SLINK_SPC0 (1 << 29) +#define SLINK_TXEN (1 << 30) +#define SLINK_RXEN (1 << 31) + +#define SLINK_STATUS 0x008 +#define SLINK_COUNT(val) (((val) >> 0) & 0x1f) +#define SLINK_WORD(val) (((val) >> 5) & 0x1f) +#define SLINK_BLK_CNT(val) (((val) >> 0) & 0xffff) +#define SLINK_MODF (1 << 16) +#define SLINK_RX_UNF (1 << 18) +#define SLINK_TX_OVF (1 << 19) +#define SLINK_TX_FULL (1 << 20) +#define SLINK_TX_EMPTY (1 << 21) +#define SLINK_RX_FULL (1 << 22) +#define SLINK_RX_EMPTY (1 << 23) +#define SLINK_TX_UNF (1 << 24) +#define SLINK_RX_OVF (1 << 25) +#define SLINK_TX_FLUSH (1 << 26) +#define SLINK_RX_FLUSH (1 << 27) +#define SLINK_SCLK (1 << 28) +#define SLINK_ERR (1 << 29) +#define SLINK_RDY (1 << 30) +#define SLINK_BSY (1 << 31) +#define SLINK_FIFO_ERROR (SLINK_TX_OVF | SLINK_RX_UNF | \ + SLINK_TX_UNF | SLINK_RX_OVF) + +#define SLINK_FIFO_EMPTY (SLINK_TX_EMPTY | SLINK_RX_EMPTY) + +#define SLINK_MAS_DATA 0x010 +#define SLINK_SLAVE_DATA 0x014 + +#define SLINK_DMA_CTL 0x018 +#define SLINK_DMA_BLOCK_SIZE(x) (((x) & 0xffff) << 0) +#define SLINK_TX_TRIG_1 (0 << 16) +#define SLINK_TX_TRIG_4 (1 << 16) +#define SLINK_TX_TRIG_8 (2 << 16) +#define SLINK_TX_TRIG_16 (3 << 16) +#define SLINK_TX_TRIG_MASK (3 << 16) +#define SLINK_RX_TRIG_1 (0 << 18) +#define SLINK_RX_TRIG_4 (1 << 18) +#define SLINK_RX_TRIG_8 (2 << 18) +#define SLINK_RX_TRIG_16 (3 << 18) +#define SLINK_RX_TRIG_MASK (3 << 18) +#define SLINK_PACKED (1 << 20) +#define SLINK_PACK_SIZE_4 (0 << 21) +#define SLINK_PACK_SIZE_8 (1 << 21) +#define SLINK_PACK_SIZE_16 (2 << 21) +#define SLINK_PACK_SIZE_32 (3 << 21) +#define SLINK_PACK_SIZE_MASK (3 << 21) +#define SLINK_IE_TXC (1 << 26) +#define SLINK_IE_RXC (1 << 27) +#define SLINK_DMA_EN (1 << 31) + +#define SLINK_STATUS2 0x01c +#define SLINK_TX_FIFO_EMPTY_COUNT(val) (((val) & 0x3f) >> 0) +#define SLINK_RX_FIFO_FULL_COUNT(val) (((val) & 0x3f0000) >> 16) +#define SLINK_SS_HOLD_TIME(val) (((val) & 0xF) << 6) + +#define SLINK_TX_FIFO 0x100 +#define SLINK_RX_FIFO 0x180 + +#define DATA_DIR_TX (1 << 0) +#define DATA_DIR_RX (1 << 1) + +#define SLINK_DMA_TIMEOUT (msecs_to_jiffies(1000)) + +#define DEFAULT_SPI_DMA_BUF_LEN (16*1024) +#define TX_FIFO_EMPTY_COUNT_MAX SLINK_TX_FIFO_EMPTY_COUNT(0x20) +#define RX_FIFO_FULL_COUNT_ZERO SLINK_RX_FIFO_FULL_COUNT(0) + +#define SLINK_STATUS2_RESET \ + (TX_FIFO_EMPTY_COUNT_MAX | RX_FIFO_FULL_COUNT_ZERO << 16) + +#define MAX_CHIP_SELECT 4 +#define SLINK_FIFO_DEPTH 32 + +struct tegra_slink_chip_data { + bool cs_hold_time; +}; + +struct tegra_slink_data { + struct device *dev; + struct spi_master *master; + const struct tegra_slink_chip_data *chip_data; + spinlock_t lock; + + struct clk *clk; + void __iomem *base; + phys_addr_t phys; + unsigned irq; + int dma_req_sel; + u32 spi_max_frequency; + u32 cur_speed; + + struct spi_device *cur_spi; + unsigned cur_pos; + unsigned cur_len; + unsigned words_per_32bit; + unsigned bytes_per_word; + unsigned curr_dma_words; + unsigned cur_direction; + + unsigned cur_rx_pos; + unsigned cur_tx_pos; + + unsigned dma_buf_size; + unsigned max_buf_size; + bool is_curr_dma_xfer; + bool is_hw_based_cs; + + struct completion rx_dma_complete; + struct completion tx_dma_complete; + + u32 tx_status; + u32 rx_status; + u32 status_reg; + bool is_packed; + unsigned long packed_size; + + u32 command_reg; + u32 command2_reg; + u32 dma_control_reg; + u32 def_command_reg; + u32 def_command2_reg; + + struct completion xfer_completion; + struct spi_transfer *curr_xfer; + struct dma_chan *rx_dma_chan; + u32 *rx_dma_buf; + dma_addr_t rx_dma_phys; + struct dma_async_tx_descriptor *rx_dma_desc; + + struct dma_chan *tx_dma_chan; + u32 *tx_dma_buf; + dma_addr_t tx_dma_phys; + struct dma_async_tx_descriptor *tx_dma_desc; +}; + +static int tegra_slink_runtime_suspend(struct device *dev); +static int tegra_slink_runtime_resume(struct device *dev); + +static inline unsigned long tegra_slink_readl(struct tegra_slink_data *tspi, + unsigned long reg) +{ + return readl(tspi->base + reg); +} + +static inline void tegra_slink_writel(struct tegra_slink_data *tspi, + unsigned long val, unsigned long reg) +{ + writel(val, tspi->base + reg); + + /* Read back register to make sure that register writes completed */ + if (reg != SLINK_TX_FIFO) + readl(tspi->base + SLINK_MAS_DATA); +} + +static void tegra_slink_clear_status(struct tegra_slink_data *tspi) +{ + unsigned long val; + unsigned long val_write = 0; + + val = tegra_slink_readl(tspi, SLINK_STATUS); + + /* Write 1 to clear status register */ + val_write = SLINK_RDY | SLINK_FIFO_ERROR; + tegra_slink_writel(tspi, val_write, SLINK_STATUS); +} + +static unsigned long tegra_slink_get_packed_size(struct tegra_slink_data *tspi, + struct spi_transfer *t) +{ + unsigned long val; + + switch (tspi->bytes_per_word) { + case 0: + val = SLINK_PACK_SIZE_4; + break; + case 1: + val = SLINK_PACK_SIZE_8; + break; + case 2: + val = SLINK_PACK_SIZE_16; + break; + case 4: + val = SLINK_PACK_SIZE_32; + break; + default: + val = 0; + } + return val; +} + +static unsigned tegra_slink_calculate_curr_xfer_param( + struct spi_device *spi, struct tegra_slink_data *tspi, + struct spi_transfer *t) +{ + unsigned remain_len = t->len - tspi->cur_pos; + unsigned max_word; + unsigned bits_per_word ; + unsigned max_len; + unsigned total_fifo_words; + + bits_per_word = t->bits_per_word ? t->bits_per_word : + spi->bits_per_word; + tspi->bytes_per_word = (bits_per_word - 1) / 8 + 1; + + if (bits_per_word == 8 || bits_per_word == 16) { + tspi->is_packed = 1; + tspi->words_per_32bit = 32/bits_per_word; + } else { + tspi->is_packed = 0; + tspi->words_per_32bit = 1; + } + tspi->packed_size = tegra_slink_get_packed_size(tspi, t); + + if (tspi->is_packed) { + max_len = min(remain_len, tspi->max_buf_size); + tspi->curr_dma_words = max_len/tspi->bytes_per_word; + total_fifo_words = max_len/4; + } else { + max_word = (remain_len - 1) / tspi->bytes_per_word + 1; + max_word = min(max_word, tspi->max_buf_size/4); + tspi->curr_dma_words = max_word; + total_fifo_words = max_word; + } + return total_fifo_words; +} + +static unsigned tegra_slink_fill_tx_fifo_from_client_txbuf( + struct tegra_slink_data *tspi, struct spi_transfer *t) +{ + unsigned nbytes; + unsigned tx_empty_count; + unsigned long fifo_status; + unsigned max_n_32bit; + unsigned i, count; + unsigned long x; + unsigned int written_words; + unsigned fifo_words_left; + u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos; + + fifo_status = tegra_slink_readl(tspi, SLINK_STATUS2); + tx_empty_count = SLINK_TX_FIFO_EMPTY_COUNT(fifo_status); + + if (tspi->is_packed) { + fifo_words_left = tx_empty_count * tspi->words_per_32bit; + written_words = min(fifo_words_left, tspi->curr_dma_words); + nbytes = written_words * tspi->bytes_per_word; + max_n_32bit = DIV_ROUND_UP(nbytes, 4); + for (count = 0; count < max_n_32bit; count++) { + x = 0; + for (i = 0; (i < 4) && nbytes; i++, nbytes--) + x |= (*tx_buf++) << (i*8); + tegra_slink_writel(tspi, x, SLINK_TX_FIFO); + } + } else { + max_n_32bit = min(tspi->curr_dma_words, tx_empty_count); + written_words = max_n_32bit; + nbytes = written_words * tspi->bytes_per_word; + for (count = 0; count < max_n_32bit; count++) { + x = 0; + for (i = 0; nbytes && (i < tspi->bytes_per_word); + i++, nbytes--) + x |= ((*tx_buf++) << i*8); + tegra_slink_writel(tspi, x, SLINK_TX_FIFO); + } + } + tspi->cur_tx_pos += written_words * tspi->bytes_per_word; + return written_words; +} + +static unsigned int tegra_slink_read_rx_fifo_to_client_rxbuf( + struct tegra_slink_data *tspi, struct spi_transfer *t) +{ + unsigned rx_full_count; + unsigned long fifo_status; + unsigned i, count; + unsigned long x; + unsigned int read_words = 0; + unsigned len; + u8 *rx_buf = (u8 *)t->rx_buf + tspi->cur_rx_pos; + + fifo_status = tegra_slink_readl(tspi, SLINK_STATUS2); + rx_full_count = SLINK_RX_FIFO_FULL_COUNT(fifo_status); + if (tspi->is_packed) { + len = tspi->curr_dma_words * tspi->bytes_per_word; + for (count = 0; count < rx_full_count; count++) { + x = tegra_slink_readl(tspi, SLINK_RX_FIFO); + for (i = 0; len && (i < 4); i++, len--) + *rx_buf++ = (x >> i*8) & 0xFF; + } + tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; + read_words += tspi->curr_dma_words; + } else { + unsigned int bits_per_word; + + bits_per_word = t->bits_per_word ? t->bits_per_word : + tspi->cur_spi->bits_per_word; + for (count = 0; count < rx_full_count; count++) { + x = tegra_slink_readl(tspi, SLINK_RX_FIFO); + for (i = 0; (i < tspi->bytes_per_word); i++) + *rx_buf++ = (x >> (i*8)) & 0xFF; + } + tspi->cur_rx_pos += rx_full_count * tspi->bytes_per_word; + read_words += rx_full_count; + } + return read_words; +} + +static void tegra_slink_copy_client_txbuf_to_spi_txbuf( + struct tegra_slink_data *tspi, struct spi_transfer *t) +{ + unsigned len; + + /* Make the dma buffer to read by cpu */ + dma_sync_single_for_cpu(tspi->dev, tspi->tx_dma_phys, + tspi->dma_buf_size, DMA_TO_DEVICE); + + if (tspi->is_packed) { + len = tspi->curr_dma_words * tspi->bytes_per_word; + memcpy(tspi->tx_dma_buf, t->tx_buf + tspi->cur_pos, len); + } else { + unsigned int i; + unsigned int count; + u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos; + unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word; + unsigned int x; + + for (count = 0; count < tspi->curr_dma_words; count++) { + x = 0; + for (i = 0; consume && (i < tspi->bytes_per_word); + i++, consume--) + x |= ((*tx_buf++) << i * 8); + tspi->tx_dma_buf[count] = x; + } + } + tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word; + + /* Make the dma buffer to read by dma */ + dma_sync_single_for_device(tspi->dev, tspi->tx_dma_phys, + tspi->dma_buf_size, DMA_TO_DEVICE); +} + +static void tegra_slink_copy_spi_rxbuf_to_client_rxbuf( + struct tegra_slink_data *tspi, struct spi_transfer *t) +{ + unsigned len; + + /* Make the dma buffer to read by cpu */ + dma_sync_single_for_cpu(tspi->dev, tspi->rx_dma_phys, + tspi->dma_buf_size, DMA_FROM_DEVICE); + + if (tspi->is_packed) { + len = tspi->curr_dma_words * tspi->bytes_per_word; + memcpy(t->rx_buf + tspi->cur_rx_pos, tspi->rx_dma_buf, len); + } else { + unsigned int i; + unsigned int count; + unsigned char *rx_buf = t->rx_buf + tspi->cur_rx_pos; + unsigned int x; + unsigned int rx_mask, bits_per_word; + + bits_per_word = t->bits_per_word ? t->bits_per_word : + tspi->cur_spi->bits_per_word; + rx_mask = (1 << bits_per_word) - 1; + for (count = 0; count < tspi->curr_dma_words; count++) { + x = tspi->rx_dma_buf[count]; + x &= rx_mask; + for (i = 0; (i < tspi->bytes_per_word); i++) + *rx_buf++ = (x >> (i*8)) & 0xFF; + } + } + tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; + + /* Make the dma buffer to read by dma */ + dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys, + tspi->dma_buf_size, DMA_FROM_DEVICE); +} + +static void tegra_slink_dma_complete(void *args) +{ + struct completion *dma_complete = args; + + complete(dma_complete); +} + +static int tegra_slink_start_tx_dma(struct tegra_slink_data *tspi, int len) +{ + INIT_COMPLETION(tspi->tx_dma_complete); + tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan, + tspi->tx_dma_phys, len, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tspi->tx_dma_desc) { + dev_err(tspi->dev, "Not able to get desc for Tx\n"); + return -EIO; + } + + tspi->tx_dma_desc->callback = tegra_slink_dma_complete; + tspi->tx_dma_desc->callback_param = &tspi->tx_dma_complete; + + dmaengine_submit(tspi->tx_dma_desc); + dma_async_issue_pending(tspi->tx_dma_chan); + return 0; +} + +static int tegra_slink_start_rx_dma(struct tegra_slink_data *tspi, int len) +{ + INIT_COMPLETION(tspi->rx_dma_complete); + tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan, + tspi->rx_dma_phys, len, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tspi->rx_dma_desc) { + dev_err(tspi->dev, "Not able to get desc for Rx\n"); + return -EIO; + } + + tspi->rx_dma_desc->callback = tegra_slink_dma_complete; + tspi->rx_dma_desc->callback_param = &tspi->rx_dma_complete; + + dmaengine_submit(tspi->rx_dma_desc); + dma_async_issue_pending(tspi->rx_dma_chan); + return 0; +} + +static int tegra_slink_start_dma_based_transfer( + struct tegra_slink_data *tspi, struct spi_transfer *t) +{ + unsigned long val; + unsigned long test_val; + unsigned int len; + int ret = 0; + unsigned long status; + + /* Make sure that Rx and Tx fifo are empty */ + status = tegra_slink_readl(tspi, SLINK_STATUS); + if ((status & SLINK_FIFO_EMPTY) != SLINK_FIFO_EMPTY) { + dev_err(tspi->dev, + "Rx/Tx fifo are not empty status 0x%08lx\n", status); + return -EIO; + } + + val = SLINK_DMA_BLOCK_SIZE(tspi->curr_dma_words - 1); + val |= tspi->packed_size; + if (tspi->is_packed) + len = DIV_ROUND_UP(tspi->curr_dma_words * tspi->bytes_per_word, + 4) * 4; + else + len = tspi->curr_dma_words * 4; + + /* Set attention level based on length of transfer */ + if (len & 0xF) + val |= SLINK_TX_TRIG_1 | SLINK_RX_TRIG_1; + else if (((len) >> 4) & 0x1) + val |= SLINK_TX_TRIG_4 | SLINK_RX_TRIG_4; + else + val |= SLINK_TX_TRIG_8 | SLINK_RX_TRIG_8; + + if (tspi->cur_direction & DATA_DIR_TX) + val |= SLINK_IE_TXC; + + if (tspi->cur_direction & DATA_DIR_RX) + val |= SLINK_IE_RXC; + + tegra_slink_writel(tspi, val, SLINK_DMA_CTL); + tspi->dma_control_reg = val; + + if (tspi->cur_direction & DATA_DIR_TX) { + tegra_slink_copy_client_txbuf_to_spi_txbuf(tspi, t); + wmb(); + ret = tegra_slink_start_tx_dma(tspi, len); + if (ret < 0) { + dev_err(tspi->dev, + "Starting tx dma failed, err %d\n", ret); + return ret; + } + + /* Wait for tx fifo to be fill before starting slink */ + test_val = tegra_slink_readl(tspi, SLINK_STATUS); + while (!(test_val & SLINK_TX_FULL)) + test_val = tegra_slink_readl(tspi, SLINK_STATUS); + } + + if (tspi->cur_direction & DATA_DIR_RX) { + /* Make the dma buffer to read by dma */ + dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys, + tspi->dma_buf_size, DMA_FROM_DEVICE); + + ret = tegra_slink_start_rx_dma(tspi, len); + if (ret < 0) { + dev_err(tspi->dev, + "Starting rx dma failed, err %d\n", ret); + if (tspi->cur_direction & DATA_DIR_TX) + dmaengine_terminate_all(tspi->tx_dma_chan); + return ret; + } + } + tspi->is_curr_dma_xfer = true; + if (tspi->is_packed) { + val |= SLINK_PACKED; + tegra_slink_writel(tspi, val, SLINK_DMA_CTL); + /* HW need small delay after settign Packed mode */ + udelay(1); + } + tspi->dma_control_reg = val; + + val |= SLINK_DMA_EN; + tegra_slink_writel(tspi, val, SLINK_DMA_CTL); + return ret; +} + +static int tegra_slink_start_cpu_based_transfer( + struct tegra_slink_data *tspi, struct spi_transfer *t) +{ + unsigned long val; + unsigned cur_words; + + val = tspi->packed_size; + if (tspi->cur_direction & DATA_DIR_TX) + val |= SLINK_IE_TXC; + + if (tspi->cur_direction & DATA_DIR_RX) + val |= SLINK_IE_RXC; + + tegra_slink_writel(tspi, val, SLINK_DMA_CTL); + tspi->dma_control_reg = val; + + if (tspi->cur_direction & DATA_DIR_TX) + cur_words = tegra_slink_fill_tx_fifo_from_client_txbuf(tspi, t); + else + cur_words = tspi->curr_dma_words; + val |= SLINK_DMA_BLOCK_SIZE(cur_words - 1); + tegra_slink_writel(tspi, val, SLINK_DMA_CTL); + tspi->dma_control_reg = val; + + tspi->is_curr_dma_xfer = false; + if (tspi->is_packed) { + val |= SLINK_PACKED; + tegra_slink_writel(tspi, val, SLINK_DMA_CTL); + udelay(1); + wmb(); + } + tspi->dma_control_reg = val; + val |= SLINK_DMA_EN; + tegra_slink_writel(tspi, val, SLINK_DMA_CTL); + return 0; +} + +static int tegra_slink_init_dma_param(struct tegra_slink_data *tspi, + bool dma_to_memory) +{ + struct dma_chan *dma_chan; + u32 *dma_buf; + dma_addr_t dma_phys; + int ret; + struct dma_slave_config dma_sconfig; + dma_cap_mask_t mask; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + dma_chan = dma_request_channel(mask, NULL, NULL); + if (!dma_chan) { + dev_err(tspi->dev, + "Dma channel is not available, will try later\n"); + return -EPROBE_DEFER; + } + + dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size, + &dma_phys, GFP_KERNEL); + if (!dma_buf) { + dev_err(tspi->dev, " Not able to allocate the dma buffer\n"); + dma_release_channel(dma_chan); + return -ENOMEM; + } + + dma_sconfig.slave_id = tspi->dma_req_sel; + if (dma_to_memory) { + dma_sconfig.src_addr = tspi->phys + SLINK_RX_FIFO; + dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_sconfig.src_maxburst = 0; + } else { + dma_sconfig.dst_addr = tspi->phys + SLINK_TX_FIFO; + dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_sconfig.dst_maxburst = 0; + } + + ret = dmaengine_slave_config(dma_chan, &dma_sconfig); + if (ret) + goto scrub; + if (dma_to_memory) { + tspi->rx_dma_chan = dma_chan; + tspi->rx_dma_buf = dma_buf; + tspi->rx_dma_phys = dma_phys; + } else { + tspi->tx_dma_chan = dma_chan; + tspi->tx_dma_buf = dma_buf; + tspi->tx_dma_phys = dma_phys; + } + return 0; + +scrub: + dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys); + dma_release_channel(dma_chan); + return ret; +} + +static void tegra_slink_deinit_dma_param(struct tegra_slink_data *tspi, + bool dma_to_memory) +{ + u32 *dma_buf; + dma_addr_t dma_phys; + struct dma_chan *dma_chan; + + if (dma_to_memory) { + dma_buf = tspi->rx_dma_buf; + dma_chan = tspi->rx_dma_chan; + dma_phys = tspi->rx_dma_phys; + tspi->rx_dma_chan = NULL; + tspi->rx_dma_buf = NULL; + } else { + dma_buf = tspi->tx_dma_buf; + dma_chan = tspi->tx_dma_chan; + dma_phys = tspi->tx_dma_phys; + tspi->tx_dma_buf = NULL; + tspi->tx_dma_chan = NULL; + } + if (!dma_chan) + return; + + dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys); + dma_release_channel(dma_chan); +} + +static int tegra_slink_start_transfer_one(struct spi_device *spi, + struct spi_transfer *t, bool is_first_of_msg, + bool is_single_xfer) +{ + struct tegra_slink_data *tspi = spi_master_get_devdata(spi->master); + u32 speed; + u8 bits_per_word; + unsigned total_fifo_words; + int ret; + struct tegra_spi_device_controller_data *cdata = spi->controller_data; + unsigned long command; + unsigned long command2; + + bits_per_word = t->bits_per_word ? t->bits_per_word : + spi->bits_per_word; + speed = t->speed_hz ? t->speed_hz : spi->max_speed_hz; + if (!speed) + speed = tspi->spi_max_frequency; + if (speed != tspi->cur_speed) { + clk_set_rate(tspi->clk, speed * 4); + tspi->cur_speed = speed; + } + + tspi->cur_spi = spi; + tspi->cur_pos = 0; + tspi->cur_rx_pos = 0; + tspi->cur_tx_pos = 0; + tspi->curr_xfer = t; + total_fifo_words = tegra_slink_calculate_curr_xfer_param(spi, tspi, t); + + if (is_first_of_msg) { + tegra_slink_clear_status(tspi); + + command = tspi->def_command_reg; + command |= SLINK_BIT_LENGTH(bits_per_word - 1); + + command2 = tspi->def_command2_reg; + command2 |= SLINK_SS_EN_CS(spi->chip_select); + + /* possibly use the hw based chip select */ + tspi->is_hw_based_cs = false; + if (cdata && cdata->is_hw_based_cs && is_single_xfer && + ((tspi->curr_dma_words * tspi->bytes_per_word) == + (t->len - tspi->cur_pos))) { + int setup_count; + int sts2; + + setup_count = cdata->cs_setup_clk_count >> 1; + setup_count = max(setup_count, 3); + command2 |= SLINK_SS_SETUP(setup_count); + if (tspi->chip_data->cs_hold_time) { + int hold_count; + + hold_count = cdata->cs_hold_clk_count; + hold_count = max(hold_count, 0xF); + sts2 = tegra_slink_readl(tspi, SLINK_STATUS2); + sts2 &= ~SLINK_SS_HOLD_TIME(0xF); + sts2 |= SLINK_SS_HOLD_TIME(hold_count); + tegra_slink_writel(tspi, sts2, SLINK_STATUS2); + } + tspi->is_hw_based_cs = true; + } + + if (tspi->is_hw_based_cs) + command &= ~SLINK_CS_SW; + else + command |= SLINK_CS_SW | SLINK_CS_VALUE; + + command &= ~SLINK_MODES; + if (spi->mode & SPI_CPHA) + command |= SLINK_CK_SDA; + + if (spi->mode & SPI_CPOL) + command |= SLINK_IDLE_SCLK_DRIVE_HIGH; + else + command |= SLINK_IDLE_SCLK_DRIVE_LOW; + } else { + command = tspi->command_reg; + command &= ~SLINK_BIT_LENGTH(~0); + command |= SLINK_BIT_LENGTH(bits_per_word - 1); + + command2 = tspi->command2_reg; + command2 &= ~(SLINK_RXEN | SLINK_TXEN); + } + + tegra_slink_writel(tspi, command, SLINK_COMMAND); + tspi->command_reg = command; + + tspi->cur_direction = 0; + if (t->rx_buf) { + command2 |= SLINK_RXEN; + tspi->cur_direction |= DATA_DIR_RX; + } + if (t->tx_buf) { + command2 |= SLINK_TXEN; + tspi->cur_direction |= DATA_DIR_TX; + } + tegra_slink_writel(tspi, command2, SLINK_COMMAND2); + tspi->command2_reg = command2; + + if (total_fifo_words > SLINK_FIFO_DEPTH) + ret = tegra_slink_start_dma_based_transfer(tspi, t); + else + ret = tegra_slink_start_cpu_based_transfer(tspi, t); + return ret; +} + +static int tegra_slink_setup(struct spi_device *spi) +{ + struct tegra_slink_data *tspi = spi_master_get_devdata(spi->master); + unsigned long val; + unsigned long flags; + int ret; + unsigned int cs_pol_bit[MAX_CHIP_SELECT] = { + SLINK_CS_POLARITY, + SLINK_CS_POLARITY1, + SLINK_CS_POLARITY2, + SLINK_CS_POLARITY3, + }; + + dev_dbg(&spi->dev, "setup %d bpw, %scpol, %scpha, %dHz\n", + spi->bits_per_word, + spi->mode & SPI_CPOL ? "" : "~", + spi->mode & SPI_CPHA ? "" : "~", + spi->max_speed_hz); + + BUG_ON(spi->chip_select >= MAX_CHIP_SELECT); + + ret = pm_runtime_get_sync(tspi->dev); + if (ret < 0) { + dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret); + return ret; + } + + spin_lock_irqsave(&tspi->lock, flags); + val = tspi->def_command_reg; + if (spi->mode & SPI_CS_HIGH) + val |= cs_pol_bit[spi->chip_select]; + else + val &= ~cs_pol_bit[spi->chip_select]; + tspi->def_command_reg = val; + tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND); + spin_unlock_irqrestore(&tspi->lock, flags); + + pm_runtime_put(tspi->dev); + return 0; +} + +static int tegra_slink_prepare_transfer(struct spi_master *master) +{ + struct tegra_slink_data *tspi = spi_master_get_devdata(master); + + return pm_runtime_get_sync(tspi->dev); +} + +static int tegra_slink_unprepare_transfer(struct spi_master *master) +{ + struct tegra_slink_data *tspi = spi_master_get_devdata(master); + + pm_runtime_put(tspi->dev); + return 0; +} + +static int tegra_slink_transfer_one_message(struct spi_master *master, + struct spi_message *msg) +{ + bool is_first_msg = true; + int single_xfer; + struct tegra_slink_data *tspi = spi_master_get_devdata(master); + struct spi_transfer *xfer; + struct spi_device *spi = msg->spi; + int ret; + + msg->status = 0; + msg->actual_length = 0; + single_xfer = list_is_singular(&msg->transfers); + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + INIT_COMPLETION(tspi->xfer_completion); + ret = tegra_slink_start_transfer_one(spi, xfer, + is_first_msg, single_xfer); + if (ret < 0) { + dev_err(tspi->dev, + "spi can not start transfer, err %d\n", ret); + goto exit; + } + is_first_msg = false; + ret = wait_for_completion_timeout(&tspi->xfer_completion, + SLINK_DMA_TIMEOUT); + if (WARN_ON(ret == 0)) { + dev_err(tspi->dev, + "spi trasfer timeout, err %d\n", ret); + ret = -EIO; + goto exit; + } + + if (tspi->tx_status || tspi->rx_status) { + dev_err(tspi->dev, "Error in Transfer\n"); + ret = -EIO; + goto exit; + } + msg->actual_length += xfer->len; + if (xfer->cs_change && xfer->delay_usecs) { + tegra_slink_writel(tspi, tspi->def_command_reg, + SLINK_COMMAND); + udelay(xfer->delay_usecs); + } + } + ret = 0; +exit: + tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND); + tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2); + msg->status = ret; + spi_finalize_current_message(master); + return ret; +} + +static irqreturn_t handle_cpu_based_xfer(struct tegra_slink_data *tspi) +{ + struct spi_transfer *t = tspi->curr_xfer; + unsigned long flags; + + spin_lock_irqsave(&tspi->lock, flags); + if (tspi->tx_status || tspi->rx_status || + (tspi->status_reg & SLINK_BSY)) { + dev_err(tspi->dev, + "CpuXfer ERROR bit set 0x%x\n", tspi->status_reg); + dev_err(tspi->dev, + "CpuXfer 0x%08x:0x%08x:0x%08x\n", tspi->command_reg, + tspi->command2_reg, tspi->dma_control_reg); + tegra_periph_reset_assert(tspi->clk); + udelay(2); + tegra_periph_reset_deassert(tspi->clk); + complete(&tspi->xfer_completion); + goto exit; + } + + if (tspi->cur_direction & DATA_DIR_RX) + tegra_slink_read_rx_fifo_to_client_rxbuf(tspi, t); + + if (tspi->cur_direction & DATA_DIR_TX) + tspi->cur_pos = tspi->cur_tx_pos; + else + tspi->cur_pos = tspi->cur_rx_pos; + + if (tspi->cur_pos == t->len) { + complete(&tspi->xfer_completion); + goto exit; + } + + tegra_slink_calculate_curr_xfer_param(tspi->cur_spi, tspi, t); + tegra_slink_start_cpu_based_transfer(tspi, t); +exit: + spin_unlock_irqrestore(&tspi->lock, flags); + return IRQ_HANDLED; +} + +static irqreturn_t handle_dma_based_xfer(struct tegra_slink_data *tspi) +{ + struct spi_transfer *t = tspi->curr_xfer; + long wait_status; + int err = 0; + unsigned total_fifo_words; + unsigned long flags; + + /* Abort dmas if any error */ + if (tspi->cur_direction & DATA_DIR_TX) { + if (tspi->tx_status) { + dmaengine_terminate_all(tspi->tx_dma_chan); + err += 1; + } else { + wait_status = wait_for_completion_interruptible_timeout( + &tspi->tx_dma_complete, SLINK_DMA_TIMEOUT); + if (wait_status <= 0) { + dmaengine_terminate_all(tspi->tx_dma_chan); + dev_err(tspi->dev, "TxDma Xfer failed\n"); + err += 1; + } + } + } + + if (tspi->cur_direction & DATA_DIR_RX) { + if (tspi->rx_status) { + dmaengine_terminate_all(tspi->rx_dma_chan); + err += 2; + } else { + wait_status = wait_for_completion_interruptible_timeout( + &tspi->rx_dma_complete, SLINK_DMA_TIMEOUT); + if (wait_status <= 0) { + dmaengine_terminate_all(tspi->rx_dma_chan); + dev_err(tspi->dev, "RxDma Xfer failed\n"); + err += 2; + } + } + } + + spin_lock_irqsave(&tspi->lock, flags); + if (err) { + dev_err(tspi->dev, + "DmaXfer: ERROR bit set 0x%x\n", tspi->status_reg); + dev_err(tspi->dev, + "DmaXfer 0x%08x:0x%08x:0x%08x\n", tspi->command_reg, + tspi->command2_reg, tspi->dma_control_reg); + tegra_periph_reset_assert(tspi->clk); + udelay(2); + tegra_periph_reset_deassert(tspi->clk); + complete(&tspi->xfer_completion); + spin_unlock_irqrestore(&tspi->lock, flags); + return IRQ_HANDLED; + } + + if (tspi->cur_direction & DATA_DIR_RX) + tegra_slink_copy_spi_rxbuf_to_client_rxbuf(tspi, t); + + if (tspi->cur_direction & DATA_DIR_TX) + tspi->cur_pos = tspi->cur_tx_pos; + else + tspi->cur_pos = tspi->cur_rx_pos; + + if (tspi->cur_pos == t->len) { + complete(&tspi->xfer_completion); + goto exit; + } + + /* Continue transfer in current message */ + total_fifo_words = tegra_slink_calculate_curr_xfer_param(tspi->cur_spi, + tspi, t); + if (total_fifo_words > SLINK_FIFO_DEPTH) + err = tegra_slink_start_dma_based_transfer(tspi, t); + else + err = tegra_slink_start_cpu_based_transfer(tspi, t); + +exit: + spin_unlock_irqrestore(&tspi->lock, flags); + return IRQ_HANDLED; +} + +static irqreturn_t tegra_slink_isr_thread(int irq, void *context_data) +{ + struct tegra_slink_data *tspi = context_data; + + if (!tspi->is_curr_dma_xfer) + return handle_cpu_based_xfer(tspi); + return handle_dma_based_xfer(tspi); +} + +static irqreturn_t tegra_slink_isr(int irq, void *context_data) +{ + struct tegra_slink_data *tspi = context_data; + + tspi->status_reg = tegra_slink_readl(tspi, SLINK_STATUS); + if (tspi->cur_direction & DATA_DIR_TX) + tspi->tx_status = tspi->status_reg & + (SLINK_TX_OVF | SLINK_TX_UNF); + + if (tspi->cur_direction & DATA_DIR_RX) + tspi->rx_status = tspi->status_reg & + (SLINK_RX_OVF | SLINK_RX_UNF); + tegra_slink_clear_status(tspi); + + return IRQ_WAKE_THREAD; +} + +static struct tegra_spi_platform_data *tegra_slink_parse_dt( + struct platform_device *pdev) +{ + struct tegra_spi_platform_data *pdata; + const unsigned int *prop; + struct device_node *np = pdev->dev.of_node; + u32 of_dma[2]; + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + dev_err(&pdev->dev, "Memory alloc for pdata failed\n"); + return NULL; + } + + if (of_property_read_u32_array(np, "nvidia,dma-request-selector", + of_dma, 2) >= 0) + pdata->dma_req_sel = of_dma[1]; + + prop = of_get_property(np, "spi-max-frequency", NULL); + if (prop) + pdata->spi_max_frequency = be32_to_cpup(prop); + + return pdata; +} + +const struct tegra_slink_chip_data tegra30_spi_cdata = { + .cs_hold_time = true, +}; + +const struct tegra_slink_chip_data tegra20_spi_cdata = { + .cs_hold_time = false, +}; + +static struct of_device_id tegra_slink_of_match[] __devinitconst = { + { .compatible = "nvidia,tegra20-slink", .data = &tegra20_spi_cdata, }, + { .compatible = "nvidia,tegra30-slink", .data = &tegra30_spi_cdata, }, + {} +}; +MODULE_DEVICE_TABLE(of, tegra_slink_of_match); + +static int __devinit tegra_slink_probe(struct platform_device *pdev) +{ + struct spi_master *master; + struct tegra_slink_data *tspi; + struct resource *r; + struct tegra_spi_platform_data *pdata = pdev->dev.platform_data; + int ret, spi_irq; + const struct tegra_slink_chip_data *cdata = NULL; + const struct of_device_id *match; + + match = of_match_device(of_match_ptr(tegra_slink_of_match), &pdev->dev); + if (!match) { + dev_err(&pdev->dev, "Error: No device match found\n"); + return -ENODEV; + } + cdata = match->data; + if (!pdata && pdev->dev.of_node) + pdata = tegra_slink_parse_dt(pdev); + + if (!pdata) { + dev_err(&pdev->dev, "No platform data, exiting\n"); + return -ENODEV; + } + + if (!pdata->spi_max_frequency) + pdata->spi_max_frequency = 25000000; /* 25MHz */ + + master = spi_alloc_master(&pdev->dev, sizeof(*tspi)); + if (!master) { + dev_err(&pdev->dev, "master allocation failed\n"); + return -ENOMEM; + } + + /* the spi->mode bits understood by this driver: */ + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->setup = tegra_slink_setup; + master->prepare_transfer_hardware = tegra_slink_prepare_transfer; + master->transfer_one_message = tegra_slink_transfer_one_message; + master->unprepare_transfer_hardware = tegra_slink_unprepare_transfer; + master->num_chipselect = MAX_CHIP_SELECT; + master->bus_num = -1; + + dev_set_drvdata(&pdev->dev, master); + tspi = spi_master_get_devdata(master); + tspi->master = master; + tspi->dma_req_sel = pdata->dma_req_sel; + tspi->dev = &pdev->dev; + tspi->chip_data = cdata; + spin_lock_init(&tspi->lock); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(&pdev->dev, "No IO memory resource\n"); + ret = -ENODEV; + goto exit_free_master; + } + tspi->phys = r->start; + tspi->base = devm_request_and_ioremap(&pdev->dev, r); + if (!tspi->base) { + dev_err(&pdev->dev, + "Cannot request memregion/iomap dma address\n"); + ret = -EADDRNOTAVAIL; + goto exit_free_master; + } + + spi_irq = platform_get_irq(pdev, 0); + tspi->irq = spi_irq; + ret = request_threaded_irq(tspi->irq, tegra_slink_isr, + tegra_slink_isr_thread, IRQF_ONESHOT, + dev_name(&pdev->dev), tspi); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n", + tspi->irq); + goto exit_free_master; + } + + tspi->clk = devm_clk_get(&pdev->dev, "slink"); + if (IS_ERR(tspi->clk)) { + dev_err(&pdev->dev, "can not get clock\n"); + ret = PTR_ERR(tspi->clk); + goto exit_free_irq; + } + + tspi->max_buf_size = SLINK_FIFO_DEPTH << 2; + tspi->dma_buf_size = DEFAULT_SPI_DMA_BUF_LEN; + tspi->spi_max_frequency = pdata->spi_max_frequency; + + if (pdata->dma_req_sel) { + ret = tegra_slink_init_dma_param(tspi, true); + if (ret < 0) { + dev_err(&pdev->dev, "RxDma Init failed, err %d\n", ret); + goto exit_free_irq; + } + + ret = tegra_slink_init_dma_param(tspi, false); + if (ret < 0) { + dev_err(&pdev->dev, "TxDma Init failed, err %d\n", ret); + goto exit_rx_dma_free; + } + tspi->max_buf_size = tspi->dma_buf_size; + init_completion(&tspi->tx_dma_complete); + init_completion(&tspi->rx_dma_complete); + } + + init_completion(&tspi->xfer_completion); + + pm_runtime_enable(&pdev->dev); + if (!pm_runtime_enabled(&pdev->dev)) { + ret = tegra_slink_runtime_resume(&pdev->dev); + if (ret) + goto exit_pm_disable; + } + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret); + goto exit_pm_disable; + } + tspi->def_command_reg = SLINK_M_S; + tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN; + tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND); + tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2); + pm_runtime_put(&pdev->dev); + + master->dev.of_node = pdev->dev.of_node; + ret = spi_register_master(master); + if (ret < 0) { + dev_err(&pdev->dev, "can not register to master err %d\n", ret); + goto exit_pm_disable; + } + return ret; + +exit_pm_disable: + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + tegra_slink_runtime_suspend(&pdev->dev); + tegra_slink_deinit_dma_param(tspi, false); +exit_rx_dma_free: + tegra_slink_deinit_dma_param(tspi, true); +exit_free_irq: + free_irq(spi_irq, tspi); +exit_free_master: + spi_master_put(master); + return ret; +} + +static int __devexit tegra_slink_remove(struct platform_device *pdev) +{ + struct spi_master *master = dev_get_drvdata(&pdev->dev); + struct tegra_slink_data *tspi = spi_master_get_devdata(master); + + free_irq(tspi->irq, tspi); + spi_unregister_master(master); + + if (tspi->tx_dma_chan) + tegra_slink_deinit_dma_param(tspi, false); + + if (tspi->rx_dma_chan) + tegra_slink_deinit_dma_param(tspi, true); + + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + tegra_slink_runtime_suspend(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int tegra_slink_suspend(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + + return spi_master_suspend(master); +} + +static int tegra_slink_resume(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct tegra_slink_data *tspi = spi_master_get_devdata(master); + int ret; + + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "pm runtime failed, e = %d\n", ret); + return ret; + } + tegra_slink_writel(tspi, tspi->command_reg, SLINK_COMMAND); + tegra_slink_writel(tspi, tspi->command2_reg, SLINK_COMMAND2); + pm_runtime_put(dev); + + return spi_master_resume(master); +} +#endif + +static int tegra_slink_runtime_suspend(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct tegra_slink_data *tspi = spi_master_get_devdata(master); + + /* Flush all write which are in PPSB queue by reading back */ + tegra_slink_readl(tspi, SLINK_MAS_DATA); + + clk_disable_unprepare(tspi->clk); + return 0; +} + +static int tegra_slink_runtime_resume(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct tegra_slink_data *tspi = spi_master_get_devdata(master); + int ret; + + ret = clk_prepare_enable(tspi->clk); + if (ret < 0) { + dev_err(tspi->dev, "clk_prepare failed: %d\n", ret); + return ret; + } + return 0; +} + +static const struct dev_pm_ops slink_pm_ops = { + SET_RUNTIME_PM_OPS(tegra_slink_runtime_suspend, + tegra_slink_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_slink_suspend, tegra_slink_resume) +}; +static struct platform_driver tegra_slink_driver = { + .driver = { + .name = "spi-tegra-slink", + .owner = THIS_MODULE, + .pm = &slink_pm_ops, + .of_match_table = of_match_ptr(tegra_slink_of_match), + }, + .probe = tegra_slink_probe, + .remove = __devexit_p(tegra_slink_remove), +}; +module_platform_driver(tegra_slink_driver); + +MODULE_ALIAS("platform:spi-tegra-slink"); +MODULE_DESCRIPTION("NVIDIA Tegra20/Tegra30 SLINK Controller Driver"); +MODULE_AUTHOR("Laxman Dewangan "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/spi/spi-tegra.h b/include/linux/spi/spi-tegra.h new file mode 100644 index 000000000000..786932c62edb --- /dev/null +++ b/include/linux/spi/spi-tegra.h @@ -0,0 +1,40 @@ +/* + * spi-tegra.h: SPI interface for Nvidia Tegra20 SLINK controller. + * + * Copyright (C) 2011 NVIDIA Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_SPI_TEGRA_H +#define _LINUX_SPI_TEGRA_H + +struct tegra_spi_platform_data { + int dma_req_sel; + unsigned int spi_max_frequency; +}; + +/* + * Controller data from device to pass some info like + * hw based chip select can be used or not and if yes + * then CS hold and setup time. + */ +struct tegra_spi_device_controller_data { + bool is_hw_based_cs; + int cs_setup_clk_count; + int cs_hold_clk_count; +}; + +#endif /* _LINUX_SPI_TEGRA_H */ -- cgit v1.2.3 From 242860a47a75b933a79a30f6a40bf4858f4a3ecc Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 19 Oct 2012 09:33:12 -0300 Subject: mm/sl[aou]b: Move common kmem_cache_size() to slab.h This function is identically defined in all three allocators and it's trivial to move it to slab.h Since now it's static, inline, header-defined function this patch also drops the EXPORT_SYMBOL tag. Cc: Pekka Enberg Cc: Matt Mackall Acked-by: Christoph Lameter Signed-off-by: Ezequiel Garcia Signed-off-by: Pekka Enberg --- include/linux/slab.h | 9 ++++++++- mm/slab.c | 6 ------ mm/slob.c | 6 ------ mm/slub.c | 9 --------- 4 files changed, 8 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 83d1a1454b7e..743a10415122 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); -unsigned int kmem_cache_size(struct kmem_cache *); /* * Please use this macro to create slab caches. Simply specify the @@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +/* + * Determine the size of a slab object + */ +static inline unsigned int kmem_cache_size(struct kmem_cache *s) +{ + return s->object_size; +} + void __init kmem_cache_init_late(void); #endif /* _LINUX_SLAB_H */ diff --git a/mm/slab.c b/mm/slab.c index 6d5c83c6ddd5..1f7fd5f51f87 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3969,12 +3969,6 @@ void kfree(const void *objp) } EXPORT_SYMBOL(kfree); -unsigned int kmem_cache_size(struct kmem_cache *cachep) -{ - return cachep->object_size; -} -EXPORT_SYMBOL(kmem_cache_size); - /* * This initializes kmem_list3 or resizes various caches for all nodes. */ diff --git a/mm/slob.c b/mm/slob.c index 287a88aa4a61..fffbc820774d 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -604,12 +604,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b) } EXPORT_SYMBOL(kmem_cache_free); -unsigned int kmem_cache_size(struct kmem_cache *c) -{ - return c->object_size; -} -EXPORT_SYMBOL(kmem_cache_size); - int __kmem_cache_shutdown(struct kmem_cache *c) { /* No way to check for remaining objects */ diff --git a/mm/slub.c b/mm/slub.c index 35483e0ab6bc..deee7c754a7d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3121,15 +3121,6 @@ error: return -EINVAL; } -/* - * Determine the size of a slab object - */ -unsigned int kmem_cache_size(struct kmem_cache *s) -{ - return s->object_size; -} -EXPORT_SYMBOL(kmem_cache_size); - static void list_slab_objects(struct kmem_cache *s, struct page *page, const char *text) { -- cgit v1.2.3 From 86b00e0da6be7bbc16412f126c5b548ac5d91d50 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 25 Oct 2012 23:34:42 -0500 Subject: rbd: get parent spec for version 2 images Add support for getting the the information identifying the parent image for rbd images that have them. The child image holds a reference to its parent image specification structure. Create a new entry "parent" in /sys/bus/rbd/image/N/ to report the identifying information for the parent image, if any. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- Documentation/ABI/testing/sysfs-bus-rbd | 4 + drivers/block/rbd.c | 131 ++++++++++++++++++++++++++++++++ include/linux/ceph/rados.h | 2 + 3 files changed, 137 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd index 1cf2adf46b11..cd9213ccf3dc 100644 --- a/Documentation/ABI/testing/sysfs-bus-rbd +++ b/Documentation/ABI/testing/sysfs-bus-rbd @@ -70,6 +70,10 @@ snap_* A directory per each snapshot +parent + + Information identifying the pool, image, and snapshot id for + the parent image in a layered rbd image (format 2 only). Entries under /sys/bus/rbd/devices//snap_ ------------------------------------------------------------- diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 28052ff679ca..bce1fcfb5185 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -217,6 +217,9 @@ struct rbd_device { struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; + struct rbd_spec *parent_spec; + u64 parent_overlap; + /* protects updating the header */ struct rw_semaphore header_rwsem; @@ -2009,6 +2012,49 @@ static ssize_t rbd_snap_show(struct device *dev, return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); } +/* + * For an rbd v2 image, shows the pool id, image id, and snapshot id + * for the parent image. If there is no parent, simply shows + * "(no parent image)". + */ +static ssize_t rbd_parent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + struct rbd_spec *spec = rbd_dev->parent_spec; + int count; + char *bufp = buf; + + if (!spec) + return sprintf(buf, "(no parent image)\n"); + + count = sprintf(bufp, "pool_id %llu\npool_name %s\n", + (unsigned long long) spec->pool_id, spec->pool_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id, + spec->image_name ? spec->image_name : "(unknown)"); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n", + (unsigned long long) spec->snap_id, spec->snap_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap); + if (count < 0) + return count; + bufp += count; + + return (ssize_t) (bufp - buf); +} + static ssize_t rbd_image_refresh(struct device *dev, struct device_attribute *attr, const char *buf, @@ -2032,6 +2078,7 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); +static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL); static struct attribute *rbd_attrs[] = { &dev_attr_size.attr, @@ -2043,6 +2090,7 @@ static struct attribute *rbd_attrs[] = { &dev_attr_name.attr, &dev_attr_image_id.attr, &dev_attr_current_snap.attr, + &dev_attr_parent.attr, &dev_attr_refresh.attr, NULL }; @@ -2192,6 +2240,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, static void rbd_dev_destroy(struct rbd_device *rbd_dev) { + rbd_spec_put(rbd_dev->parent_spec); kfree(rbd_dev->header_name); rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); @@ -2400,6 +2449,71 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev) &rbd_dev->header.features); } +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) +{ + struct rbd_spec *parent_spec; + size_t size; + void *reply_buf = NULL; + __le64 snapid; + void *p; + void *end; + char *image_id; + u64 overlap; + size_t len = 0; + int ret; + + parent_spec = rbd_spec_alloc(); + if (!parent_spec) + return -ENOMEM; + + size = sizeof (__le64) + /* pool_id */ + sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */ + sizeof (__le64) + /* snap_id */ + sizeof (__le64); /* overlap */ + reply_buf = kmalloc(size, GFP_KERNEL); + if (!reply_buf) { + ret = -ENOMEM; + goto out_err; + } + + snapid = cpu_to_le64(CEPH_NOSNAP); + ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + "rbd", "get_parent", + (char *) &snapid, sizeof (snapid), + (char *) reply_buf, size, + CEPH_OSD_FLAG_READ, NULL); + dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + if (ret < 0) + goto out_err; + + ret = -ERANGE; + p = reply_buf; + end = (char *) reply_buf + size; + ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); + if (parent_spec->pool_id == CEPH_NOPOOL) + goto out; /* No parent? No problem. */ + + image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + if (IS_ERR(image_id)) { + ret = PTR_ERR(image_id); + goto out_err; + } + parent_spec->image_id = image_id; + ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); + ceph_decode_64_safe(&p, end, overlap, out_err); + + rbd_dev->parent_overlap = overlap; + rbd_dev->parent_spec = parent_spec; + parent_spec = NULL; /* rbd_dev now owns this */ +out: + ret = 0; +out_err: + kfree(reply_buf); + rbd_spec_put(parent_spec); + + return ret; +} + static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) { size_t size; @@ -3154,6 +3268,12 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) ret = rbd_read_header(rbd_dev, &rbd_dev->header); if (ret < 0) goto out_err; + + /* Version 1 images have no parent (no layering) */ + + rbd_dev->parent_spec = NULL; + rbd_dev->parent_overlap = 0; + rbd_dev->image_format = 1; dout("discovered version 1 image, header name is %s\n", @@ -3205,6 +3325,14 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) if (ret < 0) goto out_err; + /* If the image supports layering, get the parent info */ + + if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret < 0) + goto out_err; + } + /* crypto and compression type aren't (yet) supported for v2 images */ rbd_dev->header.crypt_type = 0; @@ -3224,6 +3352,9 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) return 0; out_err: + rbd_dev->parent_overlap = 0; + rbd_spec_put(rbd_dev->parent_spec); + rbd_dev->parent_spec = NULL; kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; kfree(rbd_dev->header.object_prefix); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 0a99099801a4..15077db662ed 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -87,6 +87,8 @@ struct ceph_pg { * * lpgp_num -- as above. */ +#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ + #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 #define CEPH_PG_POOL_VERSION 2 -- cgit v1.2.3 From 72afc71ffca0f444ee0e1ef8c7e34ab209bb48b3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Oct 2012 19:40:33 -0500 Subject: libceph: define ceph_pg_pool_name_by_id() Define and export function ceph_pg_pool_name_by_id() to supply the name of a pg pool whose id is given. This will be used by the next patch. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osdmap.h | 1 + net/ceph/osdmap.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e88a620b9f8a..5ea57ba69320 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); +extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); #endif diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index f552aa48fd9e..de73214b5d26 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) return NULL; } +const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) +{ + struct ceph_pg_pool_info *pi; + + if (id == CEPH_NOPOOL) + return NULL; + + if (WARN_ON_ONCE(id > (u64) INT_MAX)) + return NULL; + + pi = __lookup_pg_pool(&map->pg_pools, (int) id); + + return pi ? pi->name : NULL; +} +EXPORT_SYMBOL(ceph_pg_pool_name_by_id); + int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) { struct rb_node *rbp; -- cgit v1.2.3 From 3b98c0c2093d1f92e5b7394ae0b13d142e7ef880 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 12:49:34 +0100 Subject: drbd: switch configuration interface from connector to genetlink Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 3 +- drivers/block/drbd/drbd_int.h | 36 +- drivers/block/drbd/drbd_main.c | 27 +- drivers/block/drbd/drbd_nl.c | 1536 +++++++++++++++++++------------------- drivers/block/drbd/drbd_state.c | 7 +- include/linux/drbd.h | 35 +- include/linux/genl_magic_func.h | 2 +- 7 files changed, 806 insertions(+), 840 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 7cd78617669b..c1a90616776b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -702,6 +702,7 @@ static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); struct drbd_conf *mdev = w->mdev; + struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; if (!get_ldev(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) @@ -725,7 +726,7 @@ static int w_update_odbm(struct drbd_work *w, int unused) break; } } - drbd_bcast_sync_progress(mdev); + drbd_bcast_event(mdev, &sib); return 1; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e68758344647..429fd8da6b71 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "drbd_state.h" @@ -65,7 +66,6 @@ extern unsigned int minor_count; extern int disable_sendpage; extern int allow_oos; -extern unsigned int cn_idx; #ifdef CONFIG_DRBD_FAULT_INJECTION extern int enable_faults; @@ -865,14 +865,6 @@ struct drbd_md { */ }; -/* for sync_conf and other types... */ -#define NL_PACKET(name, number, fields) struct name { fields }; -#define NL_INTEGER(pn,pr,member) int member; -#define NL_INT64(pn,pr,member) __u64 member; -#define NL_BIT(pn,pr,member) unsigned member:1; -#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; -#include "linux/drbd_nl.h" - struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; @@ -1502,7 +1494,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, extern void drbd_free_mdev(struct drbd_conf *mdev); extern void drbd_delete_device(unsigned int minor); -struct drbd_tconn *drbd_new_tconn(char *name); +struct drbd_tconn *drbd_new_tconn(const char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); struct drbd_tconn *conn_by_name(const char *name); @@ -1679,16 +1671,22 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); extern void drbd_al_shrink(struct drbd_conf *mdev); - /* drbd_nl.c */ - -void drbd_nl_cleanup(void); -int __init drbd_nl_init(void); -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); -void drbd_bcast_sync_progress(struct drbd_conf *mdev); -void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *, - const char *, const struct drbd_peer_request *); - +/* state info broadcast */ +struct sib_info { + enum drbd_state_info_bcast_reason sib_reason; + union { + struct { + char *helper_name; + unsigned helper_exit_code; + }; + struct { + union drbd_state os; + union drbd_state ns; + }; + }; +}; +void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib); /* * inline helper functions diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9f6db5947c65..9697ab872098 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -86,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!"); module_param(minor_count, uint, 0444); module_param(disable_sendpage, bool, 0644); module_param(allow_oos, bool, 0); -module_param(cn_idx, uint, 0444); module_param(proc_details, int, 0644); #ifdef CONFIG_DRBD_FAULT_INJECTION @@ -108,7 +107,6 @@ module_param(fault_devs, int, 0644); unsigned int minor_count = DRBD_MINOR_COUNT_DEF; int disable_sendpage; int allow_oos; -unsigned int cn_idx = CN_IDX_DRBD; int proc_details; /* Detail level in proc drbd*/ /* Module parameter for setting the user mode helper program @@ -2175,7 +2173,7 @@ static void drbd_cleanup(void) if (drbd_proc) remove_proc_entry("drbd", NULL); - drbd_nl_cleanup(); + drbd_genl_unregister(); idr_for_each_entry(&minors, mdev, i) drbd_delete_device(i); @@ -2237,6 +2235,9 @@ struct drbd_tconn *conn_by_name(const char *name) { struct drbd_tconn *tconn; + if (!name || !name[0]) + return NULL; + write_lock_irq(&global_state_lock); list_for_each_entry(tconn, &drbd_tconns, all_tconn) { if (!strcmp(tconn->name, name)) @@ -2248,7 +2249,7 @@ found: return tconn; } -struct drbd_tconn *drbd_new_tconn(char *name) +struct drbd_tconn *drbd_new_tconn(const char *name) { struct drbd_tconn *tconn; @@ -2333,6 +2334,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, mdev->tconn = tconn; mdev->minor = minor; + mdev->vnr = vnr; drbd_init_set_defaults(mdev); @@ -2461,10 +2463,6 @@ int __init drbd_init(void) #endif } - err = drbd_nl_init(); - if (err) - return err; - err = register_blkdev(DRBD_MAJOR, "drbd"); if (err) { printk(KERN_ERR @@ -2473,6 +2471,13 @@ int __init drbd_init(void) return err; } + err = drbd_genl_register(); + if (err) { + printk(KERN_ERR "drbd: unable to register generic netlink family\n"); + goto fail; + } + + register_reboot_notifier(&drbd_notifier); /* @@ -2487,12 +2492,12 @@ int __init drbd_init(void) err = drbd_create_mempools(); if (err) - goto Enomem; + goto fail; drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { printk(KERN_ERR "drbd: unable to register proc file\n"); - goto Enomem; + goto fail; } rwlock_init(&global_state_lock); @@ -2507,7 +2512,7 @@ int __init drbd_init(void) return 0; /* Success! */ -Enomem: +fail: drbd_cleanup(); if (err == -ENOMEM) /* currently always the case */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f2739fd188a0..f9be14248e33 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -29,110 +29,225 @@ #include #include #include -#include #include #include #include "drbd_int.h" #include "drbd_req.h" #include "drbd_wrappers.h" #include -#include #include -#include #include -static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); -static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); -static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *); - -/* see get_sb_bdev and bd_claim */ +#include + +/* .doit */ +// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info); +// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); + +int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); +/* .dumpit */ +int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); + +#include +#include + +/* used blkdev_get_by_path, to claim our meta data device(s) */ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; -/* Generate the tag_list to struct functions */ -#define NL_PACKET(name, number, fields) \ -static int name ## _from_tags( \ - unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ -static int name ## _from_tags( \ - unsigned short *tags, struct name *arg) \ -{ \ - int tag; \ - int dlen; \ - \ - while ((tag = get_unaligned(tags++)) != TT_END) { \ - dlen = get_unaligned(tags++); \ - switch (tag_number(tag)) { \ - fields \ - default: \ - if (tag & T_MANDATORY) { \ - printk(KERN_ERR "drbd: Unknown tag: %d\n", tag_number(tag)); \ - return 0; \ - } \ - } \ - tags = (unsigned short *)((char *)tags + dlen); \ - } \ - return 1; \ +/* Configuration is strictly serialized, because generic netlink message + * processing is strictly serialized by the genl_lock(). + * Which means we can use one static global drbd_config_context struct. + */ +static struct drbd_config_context { + /* assigned from drbd_genlmsghdr */ + unsigned int minor; + /* assigned from request attributes, if present */ + unsigned int volume; +#define VOLUME_UNSPECIFIED (-1U) + /* pointer into the request skb, + * limited lifetime! */ + char *conn_name; + + /* reply buffer */ + struct sk_buff *reply_skb; + /* pointer into reply buffer */ + struct drbd_genlmsghdr *reply_dh; + /* resolved from attributes, if possible */ + struct drbd_conf *mdev; + struct drbd_tconn *tconn; +} adm_ctx; + +static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) +{ + genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); + if (genlmsg_reply(skb, info)) + printk(KERN_ERR "drbd: error sending genl reply\n"); } -#define NL_INTEGER(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ - arg->member = get_unaligned((int *)(tags)); \ - break; -#define NL_INT64(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ - arg->member = get_unaligned((u64 *)(tags)); \ - break; -#define NL_BIT(pn, pr, member) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ - arg->member = *(char *)(tags) ? 1 : 0; \ - break; -#define NL_STRING(pn, pr, member, len) \ - case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ - if (dlen > len) { \ - printk(KERN_ERR "drbd: arg too long: %s (%u wanted, max len: %u bytes)\n", \ - #member, dlen, (unsigned int)len); \ - return 0; \ - } \ - arg->member ## _len = dlen; \ - memcpy(arg->member, tags, min_t(size_t, dlen, len)); \ - break; -#include "linux/drbd_nl.h" - -/* Generate the struct to tag_list functions */ -#define NL_PACKET(name, number, fields) \ -static unsigned short* \ -name ## _to_tags( \ - struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ -static unsigned short* \ -name ## _to_tags( \ - struct name *arg, unsigned short *tags) \ -{ \ - fields \ - return tags; \ + +/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only + * reason it could fail was no space in skb, and there are 4k available. */ +static int drbd_msg_put_info(const char *info) +{ + struct sk_buff *skb = adm_ctx.reply_skb; + struct nlattr *nla; + int err = -EMSGSIZE; + + if (!info || !info[0]) + return 0; + + nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); + if (!nla) + return err; + + err = nla_put_string(skb, T_info_text, info); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } else + nla_nest_end(skb, nla); + return 0; } -#define NL_INTEGER(pn, pr, member) \ - put_unaligned(pn | pr | TT_INTEGER, tags++); \ - put_unaligned(sizeof(int), tags++); \ - put_unaligned(arg->member, (int *)tags); \ - tags = (unsigned short *)((char *)tags+sizeof(int)); -#define NL_INT64(pn, pr, member) \ - put_unaligned(pn | pr | TT_INT64, tags++); \ - put_unaligned(sizeof(u64), tags++); \ - put_unaligned(arg->member, (u64 *)tags); \ - tags = (unsigned short *)((char *)tags+sizeof(u64)); -#define NL_BIT(pn, pr, member) \ - put_unaligned(pn | pr | TT_BIT, tags++); \ - put_unaligned(sizeof(char), tags++); \ - *(char *)tags = arg->member; \ - tags = (unsigned short *)((char *)tags+sizeof(char)); -#define NL_STRING(pn, pr, member, len) \ - put_unaligned(pn | pr | TT_STRING, tags++); \ - put_unaligned(arg->member ## _len, tags++); \ - memcpy(tags, arg->member, arg->member ## _len); \ - tags = (unsigned short *)((char *)tags + arg->member ## _len); -#include "linux/drbd_nl.h" - -void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name); -void drbd_nl_send_reply(struct cn_msg *, int); +/* This would be a good candidate for a "pre_doit" hook, + * and per-family private info->pointers. + * But we need to stay compatible with older kernels. + * If it returns successfully, adm_ctx members are valid. + */ +#define DRBD_ADM_NEED_MINOR 1 +#define DRBD_ADM_NEED_CONN 2 +static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, + unsigned flags) +{ + struct drbd_genlmsghdr *d_in = info->userhdr; + const u8 cmd = info->genlhdr->cmd; + int err; + + memset(&adm_ctx, 0, sizeof(adm_ctx)); + + /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ + if (cmd != DRBD_ADM_GET_STATUS + && security_netlink_recv(skb, CAP_SYS_ADMIN)) + return -EPERM; + + adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!adm_ctx.reply_skb) + goto fail; + + adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb, + info, &drbd_genl_family, 0, cmd); + /* put of a few bytes into a fresh skb of >= 4k will always succeed. + * but anyways */ + if (!adm_ctx.reply_dh) + goto fail; + + adm_ctx.reply_dh->minor = d_in->minor; + adm_ctx.reply_dh->ret_code = NO_ERROR; + + if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { + struct nlattr *nla; + /* parse and validate only */ + err = drbd_cfg_context_from_attrs(NULL, info->attrs); + if (err) + goto fail; + + /* It was present, and valid, + * copy it over to the reply skb. */ + err = nla_put_nohdr(adm_ctx.reply_skb, + info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, + info->attrs[DRBD_NLA_CFG_CONTEXT]); + if (err) + goto fail; + + /* and assign stuff to the global adm_ctx */ + nla = nested_attr_tb[__nla_type(T_ctx_volume)]; + adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; + nla = nested_attr_tb[__nla_type(T_ctx_conn_name)]; + if (nla) + adm_ctx.conn_name = nla_data(nla); + } else + adm_ctx.volume = VOLUME_UNSPECIFIED; + + adm_ctx.minor = d_in->minor; + adm_ctx.mdev = minor_to_mdev(d_in->minor); + adm_ctx.tconn = conn_by_name(adm_ctx.conn_name); + + if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { + drbd_msg_put_info("unknown minor"); + return ERR_MINOR_INVALID; + } + if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) { + drbd_msg_put_info("unknown connection"); + return ERR_INVALID_REQUEST; + } + + /* some more paranoia, if the request was over-determined */ + if (adm_ctx.mdev && + adm_ctx.volume != VOLUME_UNSPECIFIED && + adm_ctx.volume != adm_ctx.mdev->vnr) { + pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", + adm_ctx.minor, adm_ctx.volume, + adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name); + drbd_msg_put_info("over-determined configuration context mismatch"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.mdev && adm_ctx.tconn && + adm_ctx.mdev->tconn != adm_ctx.tconn) { + pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", + adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); + drbd_msg_put_info("over-determined configuration context mismatch"); + return ERR_INVALID_REQUEST; + } + return NO_ERROR; + +fail: + nlmsg_free(adm_ctx.reply_skb); + adm_ctx.reply_skb = NULL; + return -ENOMEM; +} + +static int drbd_adm_finish(struct genl_info *info, int retcode) +{ + struct nlattr *nla; + const char *conn_name = NULL; + + if (!adm_ctx.reply_skb) + return -ENOMEM; + + adm_ctx.reply_dh->ret_code = retcode; + + nla = info->attrs[DRBD_NLA_CFG_CONTEXT]; + if (nla) { + nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); + if (nla) + conn_name = nla_data(nla); + } + + drbd_adm_send_reply(adm_ctx.reply_skb, info); + return 0; +} int drbd_khelper(struct drbd_conf *mdev, char *cmd) { @@ -142,9 +257,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) NULL, /* Will be set to address family */ NULL, /* Will be set to address */ NULL }; - char mb[12], af[20], ad[60], *afs; char *argv[] = {usermode_helper, cmd, mb, NULL }; + struct sib_info sib; int ret; snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); @@ -177,8 +292,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) drbd_md_sync(mdev); dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); - - drbd_bcast_ev_helper(mdev, cmd); + sib.sib_reason = SIB_HELPER_PRE; + sib.helper_name = cmd; + drbd_bcast_event(mdev, &sib); ret = call_usermodehelper(usermode_helper, argv, envp, 1); if (ret) dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", @@ -188,6 +304,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); + sib.sib_reason = SIB_HELPER_POST; + sib.helper_exit_code = ret; + drbd_bcast_event(mdev, &sib); if (ret < 0) /* Ignore any ERRNOs we got. */ ret = 0; @@ -362,7 +481,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } if (rv == SS_NOTHING_TO_DO) - goto fail; + goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { nps = drbd_try_outdate_peer(mdev); @@ -388,13 +507,13 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) rv = _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_WAIT_COMPLETE); if (rv < SS_SUCCESS) - goto fail; + goto out; } break; } if (rv < SS_SUCCESS) - goto fail; + goto out; if (forced) dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); @@ -438,33 +557,46 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) drbd_md_sync(mdev); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); - fail: +out: mutex_unlock(mdev->state_mutex); return rv; } -static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static const char *from_attrs_err_to_txt(int err) { - struct primary primary_args; - - memset(&primary_args, 0, sizeof(struct primary)); - if (!primary_from_tags(nlp->tag_list, &primary_args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } - - reply->ret_code = - drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force); - - return 0; + return err == -ENOMSG ? "required attribute missing" : + err == -EOPNOTSUPP ? "unknown mandatory attribute" : + "invalid attribute value"; } -static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); + struct set_role_parms parms; + int err; + enum drbd_ret_code retcode; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + memset(&parms, 0, sizeof(parms)); + if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { + err = set_role_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) + retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate); + else + retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0); +out: + drbd_adm_finish(info, retcode); return 0; } @@ -541,6 +673,12 @@ char *ppsize(char *buf, unsigned long long size) * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: * peer may not initiate a resize. */ +/* Note these are not to be confused with + * drbd_adm_suspend_io/drbd_adm_resume_io, + * which are (sub) state changes triggered by admin (drbdsetup), + * and can be long lived. + * This changes an mdev->flag, is triggered by drbd internals, + * and should be short-lived. */ void drbd_suspend_io(struct drbd_conf *mdev) { set_bit(SUSPEND_IO, &mdev->flags); @@ -881,11 +1019,10 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } -/* does always return 0; - * interesting return code is in reply->ret_code */ -static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; + int err; enum drbd_ret_code retcode; enum determine_dev_size dd; sector_t max_possible_sectors; @@ -897,6 +1034,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp enum drbd_state_rv rv; int cp_discovered = 0; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto fail; + + mdev = adm_ctx.mdev; conn_reconfig_start(mdev->tconn); /* if you want to reconfigure, please tear down first */ @@ -910,7 +1054,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * to realize a "hot spare" feature (not that I'd recommend that) */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); - /* allocation not in the IO path, cqueue thread context */ + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; @@ -922,12 +1066,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->dc.fencing = DRBD_FENCING_DEF; nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - if (!disk_conf_from_tags(nlp->tag_list, &nbc->dc)) { + err = disk_conf_from_attrs(&nbc->dc, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -961,7 +1107,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp */ bdev = blkdev_get_by_path(nbc->dc.meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - (nbc->dc.meta_dev_idx < 0) ? + ((int)nbc->dc.meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, @@ -997,7 +1143,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - if (nbc->dc.meta_dev_idx < 0) { + if ((int)nbc->dc.meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); @@ -1028,7 +1174,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if (nbc->dc.meta_dev_idx >= 0) + if ((int)nbc->dc.meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } @@ -1242,8 +1388,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); put_ldev(mdev); - reply->ret_code = retcode; conn_reconfig_done(mdev->tconn); + drbd_adm_finish(info, retcode); return 0; force_diskless_dec: @@ -1251,6 +1397,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp force_diskless: drbd_force_state(mdev, NS(disk, D_FAILED)); drbd_md_sync(mdev); + conn_reconfig_done(mdev->tconn); fail: if (nbc) { if (nbc->backing_bdev) @@ -1263,8 +1410,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } lc_destroy(resync_lru); - reply->ret_code = retcode; - conn_reconfig_done(mdev->tconn); + drbd_adm_finish(info, retcode); return 0; } @@ -1273,42 +1419,54 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * Then we transition to D_DISKLESS, and wait for put_ldev() to return all * internal references as well. * Only then we have finally detached. */ -static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; enum drbd_ret_code retcode; - int ret; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); - /* D_FAILED will transition to DISKLESS. */ - ret = wait_event_interruptible(mdev->misc_wait, - mdev->state.disk != D_FAILED); + retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); + wait_event(mdev->misc_wait, + mdev->state.disk != D_DISKLESS || + !atomic_read(&mdev->local_cnt)); drbd_resume_io(mdev); - if ((int)retcode == (int)SS_IS_DISKLESS) - retcode = SS_NOTHING_TO_DO; - if (ret) - retcode = ERR_INTR; - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { - int i; - enum drbd_ret_code retcode; + char hmac_name[CRYPTO_MAX_ALG_NAME]; + struct drbd_conf *mdev; struct net_conf *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - struct drbd_conf *mdev; - char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; struct drbd_tconn *oconn; + struct drbd_tconn *tconn; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; + enum drbd_ret_code retcode; + int i; + int err; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tconn = adm_ctx.tconn; conn_reconfig_start(tconn); if (tconn->cstate > C_STANDALONE) { @@ -1343,8 +1501,10 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - if (!net_conf_from_tags(nlp->tag_list, new_conf)) { + err = net_conf_from_attrs(new_conf, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } @@ -1495,8 +1655,8 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl mdev->recv_cnt = 0; kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); } - reply->ret_code = retcode; conn_reconfig_done(tconn); + drbd_adm_finish(info, retcode); return 0; fail: @@ -1508,24 +1668,37 @@ fail: crypto_free_hash(integrity_r_tfm); kfree(new_conf); - reply->ret_code = retcode; conn_reconfig_done(tconn); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) { - int retcode; - struct disconnect dc; + struct disconnect_parms parms; + struct drbd_tconn *tconn; + enum drbd_ret_code retcode; + int err; - memset(&dc, 0, sizeof(struct disconnect)); - if (!disconnect_from_tags(nlp->tag_list, &dc)) { - retcode = ERR_MANDATORY_TAG; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) goto fail; + + tconn = adm_ctx.tconn; + memset(&parms, 0, sizeof(parms)); + if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { + err = disconnect_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } } - if (dc.force) { + if (parms.force_disconnect) { spin_lock_irq(&tconn->req_lock); if (tconn->cstate >= C_WF_CONNECTION) _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); @@ -1567,7 +1740,7 @@ static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req * done: retcode = NO_ERROR; fail: - reply->ret_code = retcode; + drbd_adm_finish(info, retcode); return 0; } @@ -1587,20 +1760,32 @@ void resync_after_online_grow(struct drbd_conf *mdev) _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); } -static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) { - struct resize rs; - int retcode = NO_ERROR; + struct resize_parms rs; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; enum determine_dev_size dd; enum dds_flags ddsf; + int err; - memset(&rs, 0, sizeof(struct resize)); - if (!resize_from_tags(nlp->tag_list, &rs)) { - retcode = ERR_MANDATORY_TAG; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) goto fail; + + memset(&rs, 0, sizeof(struct resize_parms)); + if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { + err = resize_parms_from_attrs(&rs, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } } + mdev = adm_ctx.mdev; if (mdev->state.conn > C_CONNECTED) { retcode = ERR_RESIZE_RESYNC; goto fail; @@ -1644,14 +1829,14 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } fail: - reply->ret_code = retcode; + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ @@ -1662,12 +1847,21 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n int *rs_plan_s = NULL; int fifo_size; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto fail; + mdev = adm_ctx.mdev; + if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { retcode = ERR_NOMEM; + drbd_msg_put_info("unable to allocate cpumask"); goto fail; } - if (nlp->flags & DRBD_NL_SET_DEFAULTS) { + if (((struct drbd_genlmsghdr*)info->userhdr)->flags + & DRBD_GENL_F_SET_DEFAULTS) { memset(&sc, 0, sizeof(struct syncer_conf)); sc.rate = DRBD_RATE_DEF; sc.after = DRBD_AFTER_DEF; @@ -1681,8 +1875,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } else memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); - if (!syncer_conf_from_tags(nlp->tag_list, &sc)) { + err = syncer_conf_from_attrs(&sc, info->attrs); + if (err) { retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } @@ -1832,14 +2028,23 @@ fail: free_cpumask_var(new_cpu_mask); crypto_free_hash(csums_tfm); crypto_free_hash(verify_tfm); - reply->ret_code = retcode; + + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) { - int retcode; + struct drbd_conf *mdev; + int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. */ @@ -1862,7 +2067,8 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } @@ -1875,56 +2081,58 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev) return rv; } -static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, + union drbd_state mask, union drbd_state val) { - int retcode; - - /* If there is still bitmap IO pending, probably because of a previous - * resync just being finished, wait for it before requesting a new resync. */ - wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); + enum drbd_ret_code retcode; - retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); - - if (retcode < SS_SUCCESS) { - if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { - /* The peer will get a resync upon connect anyways. Just make that - into a full resync. */ - retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); - if (retcode >= SS_SUCCESS) { - if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, - "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) - retcode = ERR_IO_MD_DISK; - } - } else - retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); - } + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - reply->ret_code = retcode; + retcode = drbd_request_state(adm_ctx.mdev, mask, val); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S)); +} - if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) - retcode = ERR_PAUSE_IS_SET; +int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - reply->ret_code = retcode; + if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) + retcode = ERR_PAUSE_IS_SET; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; union drbd_state s; + enum drbd_ret_code retcode; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { - s = mdev->state; + if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { + s = adm_ctx.mdev->state; if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; @@ -1933,28 +2141,35 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } } - reply->ret_code = retcode; +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); - - return 0; + return drbd_adm_simple_request_state(skb, info, NS(susp, 1)); } -static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) { + struct drbd_conf *mdev; + int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } drbd_suspend_io(mdev); - reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); - if (reply->ret_code == SS_SUCCESS) { + retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); + if (retcode == SS_SUCCESS) { if (mdev->state.conn < C_CONNECTED) tl_clear(mdev->tconn); if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) @@ -1962,138 +2177,259 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } drbd_resume_io(mdev); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) { - reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); - return 0; + return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, + const struct sib_info *sib) { - unsigned short *tl; - - tl = reply->tag_list; - - if (get_ldev(mdev)) { - tl = disk_conf_to_tags(&mdev->ldev->dc, tl); - put_ldev(mdev); + struct state_info *si = NULL; /* for sizeof(si->member); */ + struct nlattr *nla; + int got_ldev; + int got_net; + int err = 0; + int exclude_sensitive; + + /* If sib != NULL, this is drbd_bcast_event, which anyone can listen + * to. So we better exclude_sensitive information. + * + * If sib == NULL, this is drbd_adm_get_status, executed synchronously + * in the context of the requesting user process. Exclude sensitive + * information, unless current has superuser. + * + * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and + * relies on the current implementation of netlink_dump(), which + * executes the dump callback successively from netlink_recvmsg(), + * always in the context of the receiving process */ + exclude_sensitive = sib || !capable(CAP_SYS_ADMIN); + + got_ldev = get_ldev(mdev); + got_net = get_net_conf(mdev->tconn); + + /* We need to add connection name and volume number information still. + * Minor number is in drbd_genlmsghdr. */ + nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); + if (!nla) + goto nla_put_failure; + NLA_PUT_U32(skb, T_ctx_volume, mdev->vnr); + NLA_PUT_STRING(skb, T_ctx_conn_name, mdev->tconn->name); + nla_nest_end(skb, nla); + + if (got_ldev) + if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) + goto nla_put_failure; + if (got_net) + if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) + goto nla_put_failure; + + if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive)) + goto nla_put_failure; + + nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); + if (!nla) + goto nla_put_failure; + NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY); + NLA_PUT_U32(skb, T_current_state, mdev->state.i); + NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid); + NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)); + + if (got_ldev) { + NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags); + NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); + NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev)); + NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev)); + if (C_SYNC_SOURCE <= mdev->state.conn && + C_PAUSED_SYNC_T >= mdev->state.conn) { + NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total); + NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed); + } } - if (get_net_conf(mdev->tconn)) { - tl = net_conf_to_tags(mdev->tconn->net_conf, tl); - put_net_conf(mdev->tconn); + if (sib) { + switch(sib->sib_reason) { + case SIB_SYNC_PROGRESS: + case SIB_GET_STATUS_REPLY: + break; + case SIB_STATE_CHANGE: + NLA_PUT_U32(skb, T_prev_state, sib->os.i); + NLA_PUT_U32(skb, T_new_state, sib->ns.i); + break; + case SIB_HELPER_POST: + NLA_PUT_U32(skb, + T_helper_exit_code, sib->helper_exit_code); + /* fall through */ + case SIB_HELPER_PRE: + NLA_PUT_STRING(skb, T_helper, sib->helper_name); + break; + } } - tl = syncer_conf_to_tags(&mdev->sync_conf, tl); - - put_unaligned(TT_END, tl++); /* Close the tag list */ + nla_nest_end(skb, nla); - return (int)((char *)tl - (char *)reply->tag_list); + if (0) +nla_put_failure: + err = -EMSGSIZE; + if (got_ldev) + put_ldev(mdev); + if (got_net) + put_net_conf(mdev->tconn); + return err; } -static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) { - unsigned short *tl = reply->tag_list; - union drbd_state s = mdev->state; - unsigned long rs_left; - unsigned int res; + enum drbd_ret_code retcode; + int err; - tl = get_state_to_tags((struct get_state *)&s, tl); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - /* no local ref, no bitmap, no syncer progress. */ - if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { - if (get_ldev(mdev)) { - drbd_get_syncer_progress(mdev, &rs_left, &res); - tl = tl_add_int(tl, T_sync_progress, &res); - put_ldev(mdev); - } + err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL); + if (err) { + nlmsg_free(adm_ctx.reply_skb); + return err; } - put_unaligned(TT_END, tl++); /* Close the tag list */ - - return (int)((char *)tl - (char *)reply->tag_list); +out: + drbd_adm_finish(info, retcode); + return 0; } -static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) { - unsigned short *tl; - - tl = reply->tag_list; + struct drbd_conf *mdev; + struct drbd_genlmsghdr *dh; + int minor = cb->args[0]; + + /* Open coded deferred single idr_for_each_entry iteration. + * This may miss entries inserted after this dump started, + * or entries deleted before they are reached. + * But we need to make sure the mdev won't disappear while + * we are looking at it. */ + + rcu_read_lock(); + mdev = idr_get_next(&minors, &minor); + if (mdev) { + dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, &drbd_genl_family, + NLM_F_MULTI, DRBD_ADM_GET_STATUS); + if (!dh) + goto errout; + + D_ASSERT(mdev->minor == minor); + + dh->minor = minor; + dh->ret_code = NO_ERROR; + + if (nla_put_status_info(skb, mdev, NULL)) { + genlmsg_cancel(skb, dh); + goto errout; + } + genlmsg_end(skb, dh); + } - if (get_ldev(mdev)) { - tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); - tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); - put_ldev(mdev); - } - put_unaligned(TT_END, tl++); /* Close the tag list */ +errout: + rcu_read_unlock(); + /* where to start idr_get_next with the next iteration */ + cb->args[0] = minor+1; - return (int)((char *)tl - (char *)reply->tag_list); + /* No more minors found: empty skb. Which will terminate the dump. */ + return skb->len; } -/** - * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use - * @mdev: DRBD device. - * @nlp: Netlink/connector packet from drbdsetup - * @reply: Reply packet for drbdsetup - */ -static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) { - unsigned short *tl; - char rv; - - tl = reply->tag_list; + enum drbd_ret_code retcode; + struct timeout_parms tp; + int err; - rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : - test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); - put_unaligned(TT_END, tl++); /* Close the tag list */ + tp.timeout_type = + adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : + test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED : + UT_DEFAULT; - return (int)((char *)tl - (char *)reply->tag_list); + err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp); + if (err) { + nlmsg_free(adm_ctx.reply_skb); + return err; + } +out: + drbd_adm_finish(info, retcode); + return 0; } -static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { - /* default to resume from last known position, if possible */ - struct start_ov args = - { .start_sector = mdev->ov_start_sector }; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; - if (!start_ov_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + mdev = adm_ctx.mdev; + if (info->attrs[DRBD_NLA_START_OV_PARMS]) { + /* resume from last known position, if possible */ + struct start_ov_parms parms = + { .ov_start_sector = mdev->ov_start_sector }; + int err = start_ov_parms_from_attrs(&parms, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; + } /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); - - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; - reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); +out: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) { - int retcode = NO_ERROR; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; int skip_initial_sync = 0; int err; + struct new_c_uuid_parms args; - struct new_c_uuid args; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out_nolock; - memset(&args, 0, sizeof(struct new_c_uuid)); - if (!new_c_uuid_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; + mdev = adm_ctx.mdev; + memset(&args, 0, sizeof(args)); + if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { + err = new_c_uuid_parms_from_attrs(&args, info->attrs); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out_nolock; + } } mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */ @@ -2139,510 +2475,164 @@ out_dec: put_ldev(mdev); out: mutex_unlock(mdev->state_mutex); - - reply->ret_code = retcode; - return 0; -} - -static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) -{ - struct new_connection args; - - if (!new_connection_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } - - reply->ret_code = NO_ERROR; - if (!drbd_new_tconn(args.name)) - reply->ret_code = ERR_NOMEM; - - return 0; -} - -static int drbd_nl_new_minor(struct drbd_tconn *tconn, - struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) -{ - struct new_minor args; - - args.vol_nr = 0; - args.minor = 0; - - if (!new_minor_from_tags(nlp->tag_list, &args)) { - reply->ret_code = ERR_MANDATORY_TAG; - return 0; - } - - reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); - +out_nolock: + drbd_adm_finish(info, retcode); return 0; } -static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, - struct drbd_nl_cfg_reply *reply) +static enum drbd_ret_code +drbd_check_conn_name(const char *name) { - if (mdev->state.disk == D_DISKLESS && - mdev->state.conn == C_STANDALONE && - mdev->state.role == R_SECONDARY) { - drbd_delete_device(mdev_to_minor(mdev)); - reply->ret_code = NO_ERROR; - } else { - reply->ret_code = ERR_MINOR_CONFIGURED; + if (!name || !name[0]) { + drbd_msg_put_info("connection name missing"); + return ERR_MANDATORY_TAG; } - return 0; -} - -static int drbd_nl_del_conn(struct drbd_tconn *tconn, - struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) -{ - if (conn_lowest_minor(tconn) < 0) { - drbd_free_tconn(tconn); - reply->ret_code = NO_ERROR; - } else { - reply->ret_code = ERR_CONN_IN_USE; + /* if we want to use these in sysfs/configfs/debugfs some day, + * we must not allow slashes */ + if (strchr(name, '/')) { + drbd_msg_put_info("invalid connection name"); + return ERR_INVALID_REQUEST; } - - return 0; + return NO_ERROR; } -enum cn_handler_type { - CHT_MINOR, - CHT_CONN, - CHT_CTOR, - /* CHT_RES, later */ -}; -struct cn_handler_struct { - enum cn_handler_type type; - union { - int (*minor_based)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - int (*conn_based)(struct drbd_tconn *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - int (*constructor)(struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); - }; - int reply_body_size; -}; - -static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, - [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, - [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, - [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, - [ P_net_conf ] = { CHT_CONN, { .conn_based = &drbd_nl_net_conf }, 0 }, - [ P_disconnect ] = { CHT_CONN, { .conn_based = &drbd_nl_disconnect }, 0 }, - [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, - [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, - [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, - [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, - [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, - [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, - [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, - [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, - [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, - [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, - sizeof(struct syncer_conf_tag_len_struct) + - sizeof(struct disk_conf_tag_len_struct) + - sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, - sizeof(struct get_state_tag_len_struct) + - sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, - sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, - sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, - [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, - [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, - [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, - [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, - [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, -}; - -static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) +int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) { - struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; - struct cn_handler_struct *cm; - struct cn_msg *cn_reply; - struct drbd_nl_cfg_reply *reply; - struct drbd_conf *mdev; - struct drbd_tconn *tconn; - int retcode, rr; - int reply_size = sizeof(struct cn_msg) - + sizeof(struct drbd_nl_cfg_reply) - + sizeof(short int); - - if (!try_module_get(THIS_MODULE)) { - printk(KERN_ERR "drbd: try_module_get() failed!\n"); - return; - } - - if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) { - retcode = ERR_PERM; - goto fail; - } + enum drbd_ret_code retcode; - if (nlp->packet_type >= P_nl_after_last_packet || - nlp->packet_type == P_return_code_only) { - retcode = ERR_PACKET_NR; - goto fail; - } + retcode = drbd_adm_prepare(skb, info, 0); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - cm = cnd_table + nlp->packet_type; + retcode = drbd_check_conn_name(adm_ctx.conn_name); + if (retcode != NO_ERROR) + goto out; - /* This may happen if packet number is 0: */ - if (cm->minor_based == NULL) { - retcode = ERR_PACKET_NR; - goto fail; + if (adm_ctx.tconn) { + retcode = ERR_INVALID_REQUEST; + drbd_msg_put_info("connection exists"); + goto out; } - reply_size += cm->reply_body_size; - - /* allocation not in the IO path, cqueue thread context */ - cn_reply = kzalloc(reply_size, GFP_KERNEL); - if (!cn_reply) { + if (!drbd_new_tconn(adm_ctx.conn_name)) retcode = ERR_NOMEM; - goto fail; - } - reply = (struct drbd_nl_cfg_reply *) cn_reply->data; - - reply->packet_type = - cm->reply_body_size ? nlp->packet_type : P_return_code_only; - reply->minor = nlp->drbd_minor; - reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ - /* reply->tag_list; might be modified by cm->function. */ - - retcode = ERR_MINOR_INVALID; - rr = 0; - switch (cm->type) { - case CHT_MINOR: - mdev = minor_to_mdev(nlp->drbd_minor); - if (!mdev) - goto fail; - rr = cm->minor_based(mdev, nlp, reply); - break; - case CHT_CONN: - tconn = conn_by_name(nlp->obj_name); - if (!tconn) { - retcode = ERR_CONN_NOT_KNOWN; - goto fail; - } - rr = cm->conn_based(tconn, nlp, reply); - break; - case CHT_CTOR: - rr = cm->constructor(nlp, reply); - break; - /* case CHT_RES: */ - } - - cn_reply->id = req->id; - cn_reply->seq = req->seq; - cn_reply->ack = req->ack + 1; - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; - cn_reply->flags = 0; - - rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); - if (rr && rr != -ESRCH) - printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); - - kfree(cn_reply); - module_put(THIS_MODULE); - return; - fail: - drbd_nl_send_reply(req, retcode); - module_put(THIS_MODULE); -} - -static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ - -static unsigned short * -__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, - unsigned short len, int nul_terminated) -{ - unsigned short l = tag_descriptions[tag_number(tag)].max_len; - len = (len < l) ? len : l; - put_unaligned(tag, tl++); - put_unaligned(len, tl++); - memcpy(tl, data, len); - tl = (unsigned short*)((char*)tl + len); - if (nul_terminated) - *((char*)tl - 1) = 0; - return tl; -} - -static unsigned short * -tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) -{ - return __tl_add_blob(tl, tag, data, len, 0); -} - -static unsigned short * -tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) -{ - return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); -} - -static unsigned short * -tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) -{ - put_unaligned(tag, tl++); - switch(tag_type(tag)) { - case TT_INTEGER: - put_unaligned(sizeof(int), tl++); - put_unaligned(*(int *)val, (int *)tl); - tl = (unsigned short*)((char*)tl+sizeof(int)); - break; - case TT_INT64: - put_unaligned(sizeof(u64), tl++); - put_unaligned(*(u64 *)val, (u64 *)tl); - tl = (unsigned short*)((char*)tl+sizeof(u64)); - break; - default: - /* someone did something stupid. */ - ; - } - return tl; -} - -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) -{ - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct get_state_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - - /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - - tl = get_state_to_tags((struct get_state *)&state, tl); - - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_get_state; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); -} - -void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) -{ - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct call_helper_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - - /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - - tl = tl_add_str(tl, T_helper, helper_name); - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; - - reply->packet_type = P_call_helper; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); +out: + drbd_adm_finish(info, retcode); + return 0; } -void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, - const char *seen_hash, const char *calc_hash, - const struct drbd_peer_request *peer_req) +int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) { - struct cn_msg *cn_reply; - struct drbd_nl_cfg_reply *reply; - unsigned short *tl; - struct page *page; - unsigned len; + struct drbd_genlmsghdr *dh = info->userhdr; + enum drbd_ret_code retcode; - if (!peer_req) - return; - if (!reason || !reason[0]) - return; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - /* apparently we have to memcpy twice, first to prepare the data for the - * struct cn_msg, then within cn_netlink_send from the cn_msg to the - * netlink skb. */ - /* receiver thread context, which is not in the writeout path (of this node), - * but may be in the writeout path of the _other_ node. - * GFP_NOIO to avoid potential "distributed deadlock". */ - cn_reply = kzalloc( - sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct dump_ee_tag_len_struct)+ - sizeof(short int), - GFP_NOIO); - - if (!cn_reply) { - dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, " - "sector %llu, size %u\n", - (unsigned long long)peer_req->i.sector, - peer_req->i.size); - return; + /* FIXME drop minor_count parameter, limit to MINORMASK */ + if (dh->minor >= minor_count) { + drbd_msg_put_info("requested minor out of range"); + retcode = ERR_INVALID_REQUEST; + goto out; } - - reply = (struct drbd_nl_cfg_reply*)cn_reply->data; - tl = reply->tag_list; - - tl = tl_add_str(tl, T_dump_ee_reason, reason); - tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); - tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); - tl = tl_add_int(tl, T_ee_sector, &peer_req->i.sector); - tl = tl_add_int(tl, T_ee_block_id, &peer_req->block_id); - - /* dump the first 32k */ - len = min_t(unsigned, peer_req->i.size, 32 << 10); - put_unaligned(T_ee_data, tl++); - put_unaligned(len, tl++); - - page = peer_req->pages; - page_chain_for_each(page) { - void *d = kmap_atomic(page, KM_USER0); - unsigned l = min_t(unsigned, len, PAGE_SIZE); - memcpy(tl, d, l); - kunmap_atomic(d, KM_USER0); - tl = (unsigned short*)((char*)tl + l); - len -= l; - if (len == 0) - break; + /* FIXME we need a define here */ + if (adm_ctx.volume >= 256) { + drbd_msg_put_info("requested volume id out of range"); + retcode = ERR_INVALID_REQUEST; + goto out; } - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; // not used here. - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char*)tl - (char*)reply->tag_list); - cn_reply->flags = 0; - reply->packet_type = P_dump_ee; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; - - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); - kfree(cn_reply); + retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume); +out: + drbd_adm_finish(info, retcode); + return 0; } -void drbd_bcast_sync_progress(struct drbd_conf *mdev) +int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) { - char buffer[sizeof(struct cn_msg)+ - sizeof(struct drbd_nl_cfg_reply)+ - sizeof(struct sync_progress_tag_len_struct)+ - sizeof(short int)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - unsigned short *tl = reply->tag_list; - unsigned long rs_left; - unsigned int res; - - /* no local ref, no bitmap, no syncer progress, no broadcast. */ - if (!get_ldev(mdev)) - return; - drbd_get_syncer_progress(mdev, &rs_left, &res); - put_ldev(mdev); - - tl = tl_add_int(tl, T_sync_progress, &res); - put_unaligned(TT_END, tl++); /* Close the tag list */ - - cn_reply->id.idx = CN_IDX_DRBD; - cn_reply->id.val = CN_VAL_DRBD; - - cn_reply->seq = atomic_inc_return(&drbd_nl_seq); - cn_reply->ack = 0; /* not used here. */ - cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + - (int)((char *)tl - (char *)reply->tag_list); - cn_reply->flags = 0; + struct drbd_conf *mdev; + enum drbd_ret_code retcode; - reply->packet_type = P_sync_progress; - reply->minor = mdev_to_minor(mdev); - reply->ret_code = NO_ERROR; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; - cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); + mdev = adm_ctx.mdev; + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + retcode = NO_ERROR; + } else + retcode = ERR_MINOR_CONFIGURED; +out: + drbd_adm_finish(info, retcode); + return 0; } -int __init drbd_nl_init(void) +int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) { - static struct cb_id cn_id_drbd; - int err, try=10; - - cn_id_drbd.val = CN_VAL_DRBD; - do { - cn_id_drbd.idx = cn_idx; - err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback); - if (!err) - break; - cn_idx = (cn_idx + CN_IDX_STEP); - } while (try--); + enum drbd_ret_code retcode; - if (err) { - printk(KERN_ERR "drbd: cn_drbd failed to register\n"); - return err; + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (conn_lowest_minor(adm_ctx.tconn) < 0) { + drbd_free_tconn(adm_ctx.tconn); + retcode = NO_ERROR; + } else { + retcode = ERR_CONN_IN_USE; } +out: + drbd_adm_finish(info, retcode); return 0; } -void drbd_nl_cleanup(void) +void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) { - static struct cb_id cn_id_drbd; - - cn_id_drbd.idx = cn_idx; - cn_id_drbd.val = CN_VAL_DRBD; + static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ + struct sk_buff *msg; + struct drbd_genlmsghdr *d_out; + unsigned seq; + int err = -ENOMEM; + + seq = atomic_inc_return(&drbd_genl_seq); + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); + if (!msg) + goto failed; + + err = -EMSGSIZE; + d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT); + if (!d_out) /* cannot happen, but anyways. */ + goto nla_put_failure; + d_out->minor = mdev_to_minor(mdev); + d_out->ret_code = 0; + + if (nla_put_status_info(msg, mdev, sib)) + goto nla_put_failure; + genlmsg_end(msg, d_out); + err = drbd_genl_multicast_events(msg, 0); + /* msg has been consumed or freed in netlink_broadcast() */ + if (err && err != -ESRCH) + goto failed; - cn_del_callback(&cn_id_drbd); -} + return; -void drbd_nl_send_reply(struct cn_msg *req, int ret_code) -{ - char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)]; - struct cn_msg *cn_reply = (struct cn_msg *) buffer; - struct drbd_nl_cfg_reply *reply = - (struct drbd_nl_cfg_reply *)cn_reply->data; - int rr; - - memset(buffer, 0, sizeof(buffer)); - cn_reply->id = req->id; - - cn_reply->seq = req->seq; - cn_reply->ack = req->ack + 1; - cn_reply->len = sizeof(struct drbd_nl_cfg_reply); - cn_reply->flags = 0; - - reply->packet_type = P_return_code_only; - reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; - reply->ret_code = ret_code; - - rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); - if (rr && rr != -ESRCH) - printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); +nla_put_failure: + nlmsg_free(msg); +failed: + dev_err(DEV, "Error %d while broadcasting event. " + "Event seq:%u sib_reason:%u\n", + err, seq, sib->sib_reason); } - diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index ffee90d6d374..a280bc238acd 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -970,6 +970,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, enum drbd_fencing_p fp; enum drbd_req_event what = NOTHING; union drbd_state nsm = (union drbd_state){ .i = -1 }; + struct sib_info sib; + + sib.sib_reason = SIB_STATE_CHANGE; + sib.os = os; + sib.ns = ns; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { clear_bit(CRASHED_PRIMARY, &mdev->flags); @@ -984,7 +989,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } /* Inform userspace about the change... */ - drbd_bcast_state(mdev, ns); + drbd_bcast_event(mdev, &sib); if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index e192167e6145..d28fdd8fcd49 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -51,7 +51,6 @@ #endif - extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 @@ -159,6 +158,7 @@ enum drbd_ret_code { ERR_CONN_IN_USE = 159, ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, + ERR_INVALID_REQUEST = 162, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -349,37 +349,4 @@ enum drbd_timeout_flag { #define DRBD_MD_INDEX_FLEX_EXT -2 #define DRBD_MD_INDEX_FLEX_INT -3 -/* Start of the new netlink/connector stuff */ - -enum drbd_ncr_flags { - DRBD_NL_CREATE_DEVICE = 0x01, - DRBD_NL_SET_DEFAULTS = 0x02, -}; -#define DRBD_NL_OBJ_NAME_LEN 32 - - -/* For searching a vacant cn_idx value */ -#define CN_IDX_STEP 6977 - -struct drbd_nl_cfg_req { - int packet_type; - union { - struct { - unsigned int drbd_minor; - enum drbd_ncr_flags flags; - }; - struct { - char obj_name[DRBD_NL_OBJ_NAME_LEN]; - }; - }; - unsigned short tag_list[]; -}; - -struct drbd_nl_cfg_reply { - int packet_type; - unsigned int minor; - int ret_code; /* enum ret_code or set_st_err_t */ - unsigned short tag_list[]; /* only used with get_* calls */ -}; - #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 8a86f659d363..c8c67239f616 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -95,7 +95,7 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ #endif #endif -#if 1 +#ifdef GENL_MAGIC_DEBUG static void dprint_field(const char *dir, int nla_type, const char *name, void *valp) { -- cgit v1.2.3 From 73d901b74f1070c8a664349b564ba6f8bc8ab283 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:38:56 +0100 Subject: drbd: remove now unused connector related files Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_nl.h | 172 ----------------------------------------- include/linux/drbd_tag_magic.h | 84 -------------------- 2 files changed, 256 deletions(-) delete mode 100644 include/linux/drbd_nl.h delete mode 100644 include/linux/drbd_tag_magic.h (limited to 'include/linux') diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h deleted file mode 100644 index 1216c7a432c5..000000000000 --- a/include/linux/drbd_nl.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - PAKET( name, - TYPE ( pn, pr, member ) - ... - ) - - You may never reissue one of the pn arguments -*/ - -#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64) -#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined" -#endif - -NL_PACKET(primary, 1, - NL_BIT( 1, T_MAY_IGNORE, primary_force) -) - -NL_PACKET(secondary, 2, ) - -NL_PACKET(disk_conf, 3, - NL_INT64( 2, T_MAY_IGNORE, disk_size) - NL_STRING( 3, T_MANDATORY, backing_dev, 128) - NL_STRING( 4, T_MANDATORY, meta_dev, 128) - NL_INTEGER( 5, T_MANDATORY, meta_dev_idx) - NL_INTEGER( 6, T_MAY_IGNORE, on_io_error) - NL_INTEGER( 7, T_MAY_IGNORE, fencing) - NL_BIT( 37, T_MAY_IGNORE, use_bmbv) - NL_BIT( 53, T_MAY_IGNORE, no_disk_flush) - NL_BIT( 54, T_MAY_IGNORE, no_md_flush) - /* 55 max_bio_size was available in 8.2.6rc2 */ - NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) - NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) - NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) -) - -NL_PACKET(detach, 4, ) - -NL_PACKET(net_conf, 5, - NL_STRING( 8, T_MANDATORY, my_addr, 128) - NL_STRING( 9, T_MANDATORY, peer_addr, 128) - NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX) - NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX) - NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX) - NL_INTEGER( 14, T_MAY_IGNORE, timeout) - NL_INTEGER( 15, T_MANDATORY, wire_protocol) - NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int) - NL_INTEGER( 17, T_MAY_IGNORE, ping_int) - NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size) - NL_INTEGER( 19, T_MAY_IGNORE, max_buffers) - NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark) - NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size) - NL_INTEGER( 22, T_MAY_IGNORE, ko_count) - NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p) - NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p) - NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p) - NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) - NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) - NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) - NL_INTEGER( 81, T_MAY_IGNORE, on_congestion) - NL_INTEGER( 82, T_MAY_IGNORE, cong_fill) - NL_INTEGER( 83, T_MAY_IGNORE, cong_extents) - /* 59 addr_family was available in GIT, never released */ - NL_BIT( 60, T_MANDATORY, mind_af) - NL_BIT( 27, T_MAY_IGNORE, want_lose) - NL_BIT( 28, T_MAY_IGNORE, two_primaries) - NL_BIT( 41, T_MAY_IGNORE, always_asbp) - NL_BIT( 61, T_MAY_IGNORE, no_cork) - NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) - NL_BIT( 70, T_MANDATORY, dry_run) -) - -NL_PACKET(disconnect, 6, - NL_BIT( 84, T_MAY_IGNORE, force) -) - -NL_PACKET(resize, 7, - NL_INT64( 29, T_MAY_IGNORE, resize_size) - NL_BIT( 68, T_MAY_IGNORE, resize_force) - NL_BIT( 69, T_MANDATORY, no_resync) -) - -NL_PACKET(syncer_conf, 8, - NL_INTEGER( 30, T_MAY_IGNORE, rate) - NL_INTEGER( 31, T_MAY_IGNORE, after) - NL_INTEGER( 32, T_MAY_IGNORE, al_extents) -/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume) - * NL_INTEGER( 72, T_MAY_IGNORE, dp_interval) - * NL_INTEGER( 73, T_MAY_IGNORE, throttle_th) - * NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th) - * feature will be reimplemented differently with 8.3.9 */ - NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) - NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) - NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) - NL_BIT( 65, T_MAY_IGNORE, use_rle) - NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) - NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead) - NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) - NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) - NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) - NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate) -) - -NL_PACKET(invalidate, 9, ) -NL_PACKET(invalidate_peer, 10, ) -NL_PACKET(pause_sync, 11, ) -NL_PACKET(resume_sync, 12, ) -NL_PACKET(suspend_io, 13, ) -NL_PACKET(resume_io, 14, ) -NL_PACKET(outdate, 15, ) -NL_PACKET(get_config, 16, ) -NL_PACKET(get_state, 17, - NL_INTEGER( 33, T_MAY_IGNORE, state_i) -) - -NL_PACKET(get_uuids, 18, - NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64))) - NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags) -) - -NL_PACKET(get_timeout_flag, 19, - NL_BIT( 36, T_MAY_IGNORE, use_degraded) -) - -NL_PACKET(call_helper, 20, - NL_STRING( 38, T_MAY_IGNORE, helper, 32) -) - -/* Tag nr 42 already allocated in drbd-8.1 development. */ - -NL_PACKET(sync_progress, 23, - NL_INTEGER( 43, T_MAY_IGNORE, sync_progress) -) - -NL_PACKET(dump_ee, 24, - NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32) - NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX) - NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX) - NL_INT64( 48, T_MAY_IGNORE, ee_sector) - NL_INT64( 49, T_MAY_IGNORE, ee_block_id) - NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10) -) - -NL_PACKET(start_ov, 25, - NL_INT64( 66, T_MAY_IGNORE, start_sector) -) - -NL_PACKET(new_c_uuid, 26, - NL_BIT( 63, T_MANDATORY, clear_bm) -) - -#ifdef NL_RESPONSE -NL_RESPONSE(return_code_only, 27) -#endif - -NL_PACKET(new_connection, 28, /* CHT_CTOR */ - NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) -) - -NL_PACKET(new_minor, 29, /* CHT_CONN */ - NL_INTEGER( 86, T_MANDATORY, minor) - NL_INTEGER( 87, T_MANDATORY, vol_nr) -) - -NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ -NL_PACKET(del_connection, 31, ) /* CHT_CONN */ - -#undef NL_PACKET -#undef NL_INTEGER -#undef NL_INT64 -#undef NL_BIT -#undef NL_STRING -#undef NL_RESPONSE diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h deleted file mode 100644 index 069543190516..000000000000 --- a/include/linux/drbd_tag_magic.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef DRBD_TAG_MAGIC_H -#define DRBD_TAG_MAGIC_H - -#define TT_END 0 -#define TT_REMOVED 0xE000 - -/* declare packet_type enums */ -enum packet_types { -#define NL_PACKET(name, number, fields) P_ ## name = number, -#define NL_RESPONSE(name, number) P_ ## name = number, -#define NL_INTEGER(pn, pr, member) -#define NL_INT64(pn, pr, member) -#define NL_BIT(pn, pr, member) -#define NL_STRING(pn, pr, member, len) -#include "drbd_nl.h" - P_nl_after_last_packet, -}; - -/* These struct are used to deduce the size of the tag lists: */ -#define NL_PACKET(name, number, fields) \ - struct name ## _tag_len_struct { fields }; -#define NL_INTEGER(pn, pr, member) \ - int member; int tag_and_len ## member; -#define NL_INT64(pn, pr, member) \ - __u64 member; int tag_and_len ## member; -#define NL_BIT(pn, pr, member) \ - unsigned char member:1; int tag_and_len ## member; -#define NL_STRING(pn, pr, member, len) \ - unsigned char member[len]; int member ## _len; \ - int tag_and_len ## member; -#include "linux/drbd_nl.h" - -/* declare tag-list-sizes */ -static const int tag_list_sizes[] = { -#define NL_PACKET(name, number, fields) 2 fields , -#define NL_INTEGER(pn, pr, member) + 4 + 4 -#define NL_INT64(pn, pr, member) + 4 + 8 -#define NL_BIT(pn, pr, member) + 4 + 1 -#define NL_STRING(pn, pr, member, len) + 4 + (len) -#include "drbd_nl.h" -}; - -/* The two highest bits are used for the tag type */ -#define TT_MASK 0xC000 -#define TT_INTEGER 0x0000 -#define TT_INT64 0x4000 -#define TT_BIT 0x8000 -#define TT_STRING 0xC000 -/* The next bit indicates if processing of the tag is mandatory */ -#define T_MANDATORY 0x2000 -#define T_MAY_IGNORE 0x0000 -#define TN_MASK 0x1fff -/* The remaining 13 bits are used to enumerate the tags */ - -#define tag_type(T) ((T) & TT_MASK) -#define tag_number(T) ((T) & TN_MASK) - -/* declare tag enums */ -#define NL_PACKET(name, number, fields) fields -enum drbd_tags { -#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr , -#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr , -#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr , -#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr , -#include "drbd_nl.h" -}; - -struct tag { - const char *name; - int type_n_flags; - int max_len; -}; - -/* declare tag names */ -#define NL_PACKET(name, number, fields) fields -static const struct tag tag_descriptions[] = { -#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) }, -#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) }, -#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) }, -#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) }, -#include "drbd_nl.h" -}; - -#endif -- cgit v1.2.3 From 85f75dd7630436b0aa46a6393099c0f23121f5f0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 15 Mar 2011 16:26:37 +0100 Subject: drbd: introduce in-kernel "down" command This greatly simplifies deconfiguration of whole resources. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 - drivers/block/drbd/drbd_nl.c | 203 ++++++++++++++++++++++++++++++----------- include/linux/drbd_genl.h | 2 + 3 files changed, 154 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 113c7b465384..40b7b93def75 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2303,9 +2303,7 @@ fail: void drbd_free_tconn(struct drbd_tconn *tconn) { - mutex_lock(&drbd_cfg_mutex); list_del(&tconn->all_tconn); - mutex_unlock(&drbd_cfg_mutex); idr_destroy(&tconn->volumes); free_cpumask_var(tconn->cpu_mask); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f965dfe4b5ff..d952e877f8d5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -49,6 +49,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); @@ -1416,6 +1417,18 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } +static int adm_detach(struct drbd_conf *mdev) +{ + enum drbd_ret_code retcode; + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ + retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); + wait_event(mdev->misc_wait, + mdev->state.disk != D_DISKLESS || + !atomic_read(&mdev->local_cnt)); + drbd_resume_io(mdev); + return retcode; +} + /* Detaching the disk is a process in multiple stages. First we need to lock * out application IO, in-flight IO, IO stuck in drbd_al_begin_io. * Then we transition to D_DISKLESS, and wait for put_ldev() to return all @@ -1423,7 +1436,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * Only then we have finally detached. */ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -1432,13 +1444,7 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; - drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - wait_event(mdev->misc_wait, - mdev->state.disk != D_DISKLESS || - !atomic_read(&mdev->local_cnt)); - drbd_resume_io(mdev); + retcode = adm_detach(adm_ctx.mdev); out: drbd_adm_finish(info, retcode); return 0; @@ -1680,10 +1686,49 @@ out: return 0; } +static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force) +{ + enum drbd_state_rv rv; + if (force) { + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate >= C_WF_CONNECTION) + _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + spin_unlock_irq(&tconn->req_lock); + return SS_SUCCESS; + } + + rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); + + switch (rv) { + case SS_NOTHING_TO_DO: + case SS_ALREADY_STANDALONE: + return SS_SUCCESS; + case SS_PRIMARY_NOP: + /* Our state checking code wants to see the peer outdated. */ + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + pdsk, D_OUTDATED), CS_VERBOSE); + break; + case SS_CW_FAILED_BY_PEER: + /* The peer probably wants to see us outdated. */ + rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + disk, D_OUTDATED), 0); + if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) { + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + rv = SS_SUCCESS; + } + break; + default:; + /* no special handling necessary */ + } + + return rv; +} + int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) { struct disconnect_parms parms; struct drbd_tconn *tconn; + enum drbd_state_rv rv; enum drbd_ret_code retcode; int err; @@ -1704,35 +1749,8 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) } } - if (parms.force_disconnect) { - spin_lock_irq(&tconn->req_lock); - if (tconn->cstate >= C_WF_CONNECTION) - _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - spin_unlock_irq(&tconn->req_lock); - goto done; - } - - retcode = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); - - if (retcode == SS_NOTHING_TO_DO) - goto done; - else if (retcode == SS_ALREADY_STANDALONE) - goto done; - else if (retcode == SS_PRIMARY_NOP) { - /* Our state checking code wants to see the peer outdated. */ - retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED), CS_VERBOSE); - } else if (retcode == SS_CW_FAILED_BY_PEER) { - /* The peer probably wants to see us outdated. */ - retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, - disk, D_OUTDATED), 0); - if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { - conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); - retcode = SS_SUCCESS; - } - } - - if (retcode < SS_SUCCESS) + rv = conn_try_disconnect(tconn, parms.force_disconnect); + if (rv < SS_SUCCESS) goto fail; if (wait_event_interruptible(tconn->ping_wait, @@ -1743,7 +1761,6 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) goto fail; } - done: retcode = NO_ERROR; fail: drbd_adm_finish(info, retcode); @@ -2644,9 +2661,21 @@ out: return 0; } +static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev) +{ + if (mdev->state.disk == D_DISKLESS && + /* no need to be mdev->state.conn == C_STANDALONE && + * we may want to delete a minor from a live replication group. + */ + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + return NO_ERROR; + } else + return ERR_MINOR_CONFIGURED; +} + int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); @@ -2655,19 +2684,89 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mdev = adm_ctx.mdev; - if (mdev->state.disk == D_DISKLESS && - /* no need to be mdev->state.conn == C_STANDALONE && - * we may want to delete a minor from a live replication group. - */ - mdev->state.role == R_SECONDARY) { - drbd_delete_device(mdev_to_minor(mdev)); - retcode = NO_ERROR; - /* if this was the last volume of this connection, - * this will terminate all threads */ + mutex_lock(&drbd_cfg_mutex); + retcode = adm_delete_minor(adm_ctx.mdev); + mutex_unlock(&drbd_cfg_mutex); + /* if this was the last volume of this connection, + * this will terminate all threads */ + if (retcode == NO_ERROR) conn_reconfig_done(adm_ctx.tconn); - } else - retcode = ERR_MINOR_CONFIGURED; +out: + drbd_adm_finish(info, retcode); + return 0; +} + +int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + enum drbd_state_rv rv; + struct drbd_conf *mdev; + unsigned i; + + retcode = drbd_adm_prepare(skb, info, 0); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + if (!adm_ctx.tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto out; + } + + mutex_lock(&drbd_cfg_mutex); + /* demote */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + retcode = drbd_set_role(mdev, R_SECONDARY, 0); + if (retcode < SS_SUCCESS) { + drbd_msg_put_info("failed to demote"); + goto out_unlock; + } + } + + /* disconnect */ + rv = conn_try_disconnect(adm_ctx.tconn, 0); + if (rv < SS_SUCCESS) { + retcode = rv; /* enum type mismatch! */ + drbd_msg_put_info("failed to disconnect"); + goto out_unlock; + } + + /* detach */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + rv = adm_detach(mdev); + if (rv < SS_SUCCESS) { + retcode = rv; /* enum type mismatch! */ + drbd_msg_put_info("failed to detach"); + goto out_unlock; + } + } + + /* delete volumes */ + idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { + retcode = adm_delete_minor(mdev); + if (retcode != NO_ERROR) { + /* "can not happen" */ + drbd_msg_put_info("failed to delete volume"); + goto out_unlock; + } + } + + /* stop all threads */ + conn_reconfig_done(adm_ctx.tconn); + + /* delete connection */ + if (conn_lowest_minor(adm_ctx.tconn) < 0) { + drbd_free_tconn(adm_ctx.tconn); + retcode = NO_ERROR; + } else { + /* "can not happen" */ + retcode = ERR_CONN_IN_USE; + drbd_msg_put_info("failed to delete connection"); + goto out_unlock; + } +out_unlock: + mutex_unlock(&drbd_cfg_mutex); out: drbd_adm_finish(info, retcode); return 0; @@ -2683,12 +2782,14 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; + mutex_lock(&drbd_cfg_mutex); if (conn_lowest_minor(adm_ctx.tconn) < 0) { drbd_free_tconn(adm_ctx.tconn); retcode = NO_ERROR; } else { retcode = ERR_CONN_IN_USE; } + mutex_unlock(&drbd_cfg_mutex); out: drbd_adm_finish(info, retcode); diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 84e16848f7a1..a07d69279b1a 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -347,3 +347,5 @@ GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) -- cgit v1.2.3 From 047e95e259e81d7b97eca10cda0aa93082531ac1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Mar 2011 14:43:36 +0100 Subject: drbd: Allow volumes to become primary only on one side Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 21 ++++++++++++++++++--- drivers/block/drbd/drbd_strings.c | 1 + include/linux/drbd.h | 3 ++- 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 886b996ec7b3..11685658659e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -329,6 +329,18 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio dev_info(DEV, "%s\n", pb); } +static bool vol_has_primary_peer(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int minor; + + idr_for_each_entry(&tconn->volumes, mdev, minor) { + if (mdev->state.peer == R_PRIMARY) + return true; + } + return false; +} + /** * is_valid_state() - Returns an SS_ error code if ns is not valid * @mdev: DRBD device. @@ -349,9 +361,12 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } if (get_net_conf(mdev->tconn)) { - if (!mdev->tconn->net_conf->two_primaries && - ns.role == R_PRIMARY && ns.peer == R_PRIMARY) - rv = SS_TWO_PRIMARIES; + if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) { + if (ns.peer == R_PRIMARY) + rv = SS_TWO_PRIMARIES; + else if (vol_has_primary_peer(mdev->tconn)) + rv = SS_O_VOL_PEER_PRI; + } put_net_conf(mdev->tconn); } diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index c44a2a602772..9a664bd27404 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = { [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", + [-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config", }; const char *drbd_conn_str(enum drbd_conns s) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d28fdd8fcd49..9cdb888607ae 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -300,7 +300,8 @@ enum drbd_state_rv { SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */ SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ - SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */ + SS_O_VOL_PEER_PRI = -20, + SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */ }; /* from drbd_strings.c */ -- cgit v1.2.3 From d1fb57435c108b8dd66d7f47b4c60c1798dcae4c Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Tue, 16 Oct 2012 12:55:39 +0530 Subject: input: TSC: ti_tscadc: Add Step configuration as platform data There are 16 programmable Step Configuration registers which are used by the sequencer. Program the Steps in order to configure a channel input to be sampled. If the same step is applied several times, the coordinate values read are more accurate. Hence we provide the user an option of how many steps should be configured. For ex: If this value is assigned as 4, This means that 4 steps are applied to read x co-ordinate and 4 steps to read y co-ordinate. Furtheron the interrupt handler already holds code to use delta filter and report the best value out of these values to the input sub-system. Signed-off-by: Patil, Rachna Signed-off-by: Samuel Ortiz --- drivers/input/touchscreen/ti_tscadc.c | 25 +++++++++++++------------ include/linux/input/ti_tscadc.h | 6 ++++++ 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/ti_tscadc.c b/drivers/input/touchscreen/ti_tscadc.c index d198cab61a69..c1bd8e5c132c 100644 --- a/drivers/input/touchscreen/ti_tscadc.c +++ b/drivers/input/touchscreen/ti_tscadc.c @@ -41,10 +41,6 @@ #define REG_CHARGEDELAY 0x060 #define REG_STEPCONFIG(n) (0x64 + ((n - 1) * 8)) #define REG_STEPDELAY(n) (0x68 + ((n - 1) * 8)) -#define REG_STEPCONFIG13 0x0C4 -#define REG_STEPDELAY13 0x0C8 -#define REG_STEPCONFIG14 0x0CC -#define REG_STEPDELAY14 0x0D0 #define REG_FIFO0CNT 0xE4 #define REG_FIFO1THR 0xF4 #define REG_FIFO0 0x100 @@ -134,6 +130,7 @@ struct tscadc { unsigned int wires; unsigned int x_plate_resistance; bool pen_down; + int steps_to_configure; }; static unsigned int tscadc_readl(struct tscadc *ts, unsigned int reg) @@ -150,9 +147,10 @@ static void tscadc_writel(struct tscadc *tsc, unsigned int reg, static void tscadc_step_config(struct tscadc *ts_dev) { unsigned int config; - int i; + int i, total_steps; /* Configure the Step registers */ + total_steps = 2 * ts_dev->steps_to_configure; config = STEPCONFIG_MODE_HWSYNC | STEPCONFIG_AVG_16 | STEPCONFIG_XPP; @@ -170,7 +168,7 @@ static void tscadc_step_config(struct tscadc *ts_dev) break; } - for (i = 1; i < 7; i++) { + for (i = 1; i <= ts_dev->steps_to_configure; i++) { tscadc_writel(ts_dev, REG_STEPCONFIG(i), config); tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); } @@ -192,7 +190,7 @@ static void tscadc_step_config(struct tscadc *ts_dev) break; } - for (i = 7; i < 13; i++) { + for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) { tscadc_writel(ts_dev, REG_STEPCONFIG(i), config); tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); } @@ -211,12 +209,14 @@ static void tscadc_step_config(struct tscadc *ts_dev) config = STEPCONFIG_MODE_HWSYNC | STEPCONFIG_AVG_16 | STEPCONFIG_YPP | STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM; - tscadc_writel(ts_dev, REG_STEPCONFIG13, config); - tscadc_writel(ts_dev, REG_STEPDELAY13, STEPCONFIG_OPENDLY); + tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config); + tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 1), + STEPCONFIG_OPENDLY); config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1; - tscadc_writel(ts_dev, REG_STEPCONFIG14, config); - tscadc_writel(ts_dev, REG_STEPDELAY14, STEPCONFIG_OPENDLY); + tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config); + tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 2), + STEPCONFIG_OPENDLY); tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB); } @@ -379,6 +379,7 @@ static int __devinit tscadc_probe(struct platform_device *pdev) ts_dev->irq = irq; ts_dev->wires = pdata->wires; ts_dev->x_plate_resistance = pdata->x_plate_resistance; + ts_dev->steps_to_configure = pdata->steps_to_configure; res = request_mem_region(res->start, resource_size(res), pdev->name); if (!res) { @@ -447,7 +448,7 @@ static int __devinit tscadc_probe(struct platform_device *pdev) tscadc_idle_config(ts_dev); tscadc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO1THRES); tscadc_step_config(ts_dev); - tscadc_writel(ts_dev, REG_FIFO1THR, 6); + tscadc_writel(ts_dev, REG_FIFO1THR, ts_dev->steps_to_configure); ctrl |= CNTRLREG_TSCSSENB; tscadc_writel(ts_dev, REG_CTRL, ctrl); diff --git a/include/linux/input/ti_tscadc.h b/include/linux/input/ti_tscadc.h index b10a527a92a4..ad442a3eb68b 100644 --- a/include/linux/input/ti_tscadc.h +++ b/include/linux/input/ti_tscadc.h @@ -7,11 +7,17 @@ * i.e. 4/5/8 wire touchscreen support * on the platform. * @x_plate_resistance: X plate resistance. + * @steps_to_configure: The sequencer supports a total of + * 16 programmable steps. + * A step configured to read a single + * co-ordinate value, can be applied + * more number of times for better results. */ struct tsc_data { int wires; int x_plate_resistance; + int steps_to_configure; }; #endif -- cgit v1.2.3 From 55c04de5176ea3eac6fdc469a6a063c5cb91ed7c Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Tue, 16 Oct 2012 12:55:42 +0530 Subject: input: TSC: ti_tscadc: Rename the existing touchscreen driver Make way for addition of MFD driver. The existing touchsreen driver is a MFD client. For better readability we rename the file to indicate its functionality as only touchscreen. Signed-off-by: Patil, Rachna Signed-off-by: Samuel Ortiz --- drivers/input/touchscreen/Kconfig | 4 +- drivers/input/touchscreen/Makefile | 2 +- drivers/input/touchscreen/ti_am335x_tsc.c | 522 ++++++++++++++++++++++++++++++ drivers/input/touchscreen/ti_tscadc.c | 522 ------------------------------ include/linux/input/ti_am335x_tsc.h | 23 ++ include/linux/input/ti_tscadc.h | 23 -- 6 files changed, 548 insertions(+), 548 deletions(-) create mode 100644 drivers/input/touchscreen/ti_am335x_tsc.c delete mode 100644 drivers/input/touchscreen/ti_tscadc.c create mode 100644 include/linux/input/ti_am335x_tsc.h delete mode 100644 include/linux/input/ti_tscadc.h (limited to 'include/linux') diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index f7668b24c378..d31dc5faeaaf 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -529,7 +529,7 @@ config TOUCHSCREEN_TOUCHWIN To compile this driver as a module, choose M here: the module will be called touchwin. -config TOUCHSCREEN_TI_TSCADC +config TOUCHSCREEN_TI_AM335X_TSC tristate "TI Touchscreen Interface" depends on ARCH_OMAP2PLUS help @@ -539,7 +539,7 @@ config TOUCHSCREEN_TI_TSCADC If unsure, say N. To compile this driver as a module, choose M here: the - module will be called ti_tscadc. + module will be called ti_am335x_tsc. config TOUCHSCREEN_ATMEL_TSADCC tristate "Atmel Touchscreen Interface" diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 178eb128d90f..7c4c78ebe49e 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -52,7 +52,7 @@ obj-$(CONFIG_TOUCHSCREEN_PIXCIR) += pixcir_i2c_ts.o obj-$(CONFIG_TOUCHSCREEN_S3C2410) += s3c2410_ts.o obj-$(CONFIG_TOUCHSCREEN_ST1232) += st1232.o obj-$(CONFIG_TOUCHSCREEN_STMPE) += stmpe-ts.o -obj-$(CONFIG_TOUCHSCREEN_TI_TSCADC) += ti_tscadc.o +obj-$(CONFIG_TOUCHSCREEN_TI_AM335X_TSC) += ti_am335x_tsc.o obj-$(CONFIG_TOUCHSCREEN_TNETV107X) += tnetv107x-ts.o obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213) += touchit213.o obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT) += touchright.o diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c new file mode 100644 index 000000000000..462950acb97b --- /dev/null +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -0,0 +1,522 @@ +/* + * TI Touch Screen driver + * + * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define REG_RAWIRQSTATUS 0x024 +#define REG_IRQSTATUS 0x028 +#define REG_IRQENABLE 0x02C +#define REG_IRQWAKEUP 0x034 +#define REG_CTRL 0x040 +#define REG_ADCFSM 0x044 +#define REG_CLKDIV 0x04C +#define REG_SE 0x054 +#define REG_IDLECONFIG 0x058 +#define REG_CHARGECONFIG 0x05C +#define REG_CHARGEDELAY 0x060 +#define REG_STEPCONFIG(n) (0x64 + ((n - 1) * 8)) +#define REG_STEPDELAY(n) (0x68 + ((n - 1) * 8)) +#define REG_FIFO0CNT 0xE4 +#define REG_FIFO0THR 0xE8 +#define REG_FIFO1THR 0xF4 +#define REG_FIFO0 0x100 +#define REG_FIFO1 0x200 + +/* Register Bitfields */ +#define IRQWKUP_ENB BIT(0) + +/* Step Enable */ +#define STEPENB_MASK (0x1FFFF << 0) +#define STEPENB(val) (val << 0) +#define STPENB_STEPENB STEPENB(0x7FFF) + +/* IRQ enable */ +#define IRQENB_FIFO0THRES BIT(2) +#define IRQENB_FIFO1THRES BIT(5) +#define IRQENB_PENUP BIT(9) + +/* Step Configuration */ +#define STEPCONFIG_MODE_MASK (3 << 0) +#define STEPCONFIG_MODE(val) (val << 0) +#define STEPCONFIG_MODE_HWSYNC STEPCONFIG_MODE(2) +#define STEPCONFIG_AVG_MASK (7 << 2) +#define STEPCONFIG_AVG(val) (val << 2) +#define STEPCONFIG_AVG_16 STEPCONFIG_AVG(4) +#define STEPCONFIG_XPP BIT(5) +#define STEPCONFIG_XNN BIT(6) +#define STEPCONFIG_YPP BIT(7) +#define STEPCONFIG_YNN BIT(8) +#define STEPCONFIG_XNP BIT(9) +#define STEPCONFIG_YPN BIT(10) +#define STEPCONFIG_INM_MASK (0xF << 15) +#define STEPCONFIG_INM(val) (val << 15) +#define STEPCONFIG_INM_ADCREFM STEPCONFIG_INM(8) +#define STEPCONFIG_INP_MASK (0xF << 19) +#define STEPCONFIG_INP(val) (val << 19) +#define STEPCONFIG_INP_AN2 STEPCONFIG_INP(2) +#define STEPCONFIG_INP_AN3 STEPCONFIG_INP(3) +#define STEPCONFIG_INP_AN4 STEPCONFIG_INP(4) +#define STEPCONFIG_INP_ADCREFM STEPCONFIG_INP(8) +#define STEPCONFIG_FIFO1 BIT(26) + +/* Delay register */ +#define STEPDELAY_OPEN_MASK (0x3FFFF << 0) +#define STEPDELAY_OPEN(val) (val << 0) +#define STEPCONFIG_OPENDLY STEPDELAY_OPEN(0x098) + +/* Charge Config */ +#define STEPCHARGE_RFP_MASK (7 << 12) +#define STEPCHARGE_RFP(val) (val << 12) +#define STEPCHARGE_RFP_XPUL STEPCHARGE_RFP(1) +#define STEPCHARGE_INM_MASK (0xF << 15) +#define STEPCHARGE_INM(val) (val << 15) +#define STEPCHARGE_INM_AN1 STEPCHARGE_INM(1) +#define STEPCHARGE_INP_MASK (0xF << 19) +#define STEPCHARGE_INP(val) (val << 19) +#define STEPCHARGE_INP_AN1 STEPCHARGE_INP(1) +#define STEPCHARGE_RFM_MASK (3 << 23) +#define STEPCHARGE_RFM(val) (val << 23) +#define STEPCHARGE_RFM_XNUR STEPCHARGE_RFM(1) + +/* Charge delay */ +#define CHARGEDLY_OPEN_MASK (0x3FFFF << 0) +#define CHARGEDLY_OPEN(val) (val << 0) +#define CHARGEDLY_OPENDLY CHARGEDLY_OPEN(1) + +/* Control register */ +#define CNTRLREG_TSCSSENB BIT(0) +#define CNTRLREG_STEPID BIT(1) +#define CNTRLREG_STEPCONFIGWRT BIT(2) +#define CNTRLREG_AFE_CTRL_MASK (3 << 5) +#define CNTRLREG_AFE_CTRL(val) (val << 5) +#define CNTRLREG_4WIRE CNTRLREG_AFE_CTRL(1) +#define CNTRLREG_5WIRE CNTRLREG_AFE_CTRL(2) +#define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) +#define CNTRLREG_TSCENB BIT(7) + +#define ADCFSM_STEPID 0x10 +#define SEQ_SETTLE 275 +#define ADC_CLK 3000000 +#define MAX_12BIT ((1 << 12) - 1) + +struct titsc { + struct input_dev *input; + struct clk *tsc_ick; + void __iomem *tsc_base; + unsigned int irq; + unsigned int wires; + unsigned int x_plate_resistance; + bool pen_down; + int steps_to_configure; +}; + +static unsigned int titsc_readl(struct titsc *ts, unsigned int reg) +{ + return readl(ts->tsc_base + reg); +} + +static void titsc_writel(struct titsc *tsc, unsigned int reg, + unsigned int val) +{ + writel(val, tsc->tsc_base + reg); +} + +static void titsc_step_config(struct titsc *ts_dev) +{ + unsigned int config; + int i, total_steps; + + /* Configure the Step registers */ + total_steps = 2 * ts_dev->steps_to_configure; + + config = STEPCONFIG_MODE_HWSYNC | + STEPCONFIG_AVG_16 | STEPCONFIG_XPP; + switch (ts_dev->wires) { + case 4: + config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN; + break; + case 5: + config |= STEPCONFIG_YNN | + STEPCONFIG_INP_AN4 | STEPCONFIG_XNN | + STEPCONFIG_YPP; + break; + case 8: + config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN; + break; + } + + for (i = 1; i <= ts_dev->steps_to_configure; i++) { + titsc_writel(ts_dev, REG_STEPCONFIG(i), config); + titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); + } + + config = 0; + config = STEPCONFIG_MODE_HWSYNC | + STEPCONFIG_AVG_16 | STEPCONFIG_YNN | + STEPCONFIG_INM_ADCREFM | STEPCONFIG_FIFO1; + switch (ts_dev->wires) { + case 4: + config |= STEPCONFIG_YPP; + break; + case 5: + config |= STEPCONFIG_XPP | STEPCONFIG_INP_AN4 | + STEPCONFIG_XNP | STEPCONFIG_YPN; + break; + case 8: + config |= STEPCONFIG_YPP; + break; + } + + for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) { + titsc_writel(ts_dev, REG_STEPCONFIG(i), config); + titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); + } + + config = 0; + /* Charge step configuration */ + config = STEPCONFIG_XPP | STEPCONFIG_YNN | + STEPCHARGE_RFP_XPUL | STEPCHARGE_RFM_XNUR | + STEPCHARGE_INM_AN1 | STEPCHARGE_INP_AN1; + + titsc_writel(ts_dev, REG_CHARGECONFIG, config); + titsc_writel(ts_dev, REG_CHARGEDELAY, CHARGEDLY_OPENDLY); + + config = 0; + /* Configure to calculate pressure */ + config = STEPCONFIG_MODE_HWSYNC | + STEPCONFIG_AVG_16 | STEPCONFIG_YPP | + STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM; + titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config); + titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 1), + STEPCONFIG_OPENDLY); + + config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1; + titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config); + titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 2), + STEPCONFIG_OPENDLY); + + titsc_writel(ts_dev, REG_SE, STPENB_STEPENB); +} + +static void titsc_idle_config(struct titsc *ts_config) +{ + unsigned int idleconfig; + + idleconfig = STEPCONFIG_YNN | + STEPCONFIG_INM_ADCREFM | + STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM; + titsc_writel(ts_config, REG_IDLECONFIG, idleconfig); +} + +static void titsc_read_coordinates(struct titsc *ts_dev, + unsigned int *x, unsigned int *y) +{ + unsigned int fifocount = titsc_readl(ts_dev, REG_FIFO0CNT); + unsigned int prev_val_x = ~0, prev_val_y = ~0; + unsigned int prev_diff_x = ~0, prev_diff_y = ~0; + unsigned int read, diff; + unsigned int i; + + /* + * Delta filter is used to remove large variations in sampled + * values from ADC. The filter tries to predict where the next + * coordinate could be. This is done by taking a previous + * coordinate and subtracting it form current one. Further the + * algorithm compares the difference with that of a present value, + * if true the value is reported to the sub system. + */ + for (i = 0; i < fifocount - 1; i++) { + read = titsc_readl(ts_dev, REG_FIFO0) & 0xfff; + diff = abs(read - prev_val_x); + if (diff < prev_diff_x) { + prev_diff_x = diff; + *x = read; + } + prev_val_x = read; + + read = titsc_readl(ts_dev, REG_FIFO1) & 0xfff; + diff = abs(read - prev_val_y); + if (diff < prev_diff_y) { + prev_diff_y = diff; + *y = read; + } + prev_val_y = read; + } +} + +static irqreturn_t titsc_irq(int irq, void *dev) +{ + struct titsc *ts_dev = dev; + struct input_dev *input_dev = ts_dev->input; + unsigned int status, irqclr = 0; + unsigned int x = 0, y = 0; + unsigned int z1, z2, z; + unsigned int fsm; + + status = titsc_readl(ts_dev, REG_IRQSTATUS); + if (status & IRQENB_FIFO0THRES) { + titsc_read_coordinates(ts_dev, &x, &y); + + z1 = titsc_readl(ts_dev, REG_FIFO0) & 0xfff; + z2 = titsc_readl(ts_dev, REG_FIFO1) & 0xfff; + + if (ts_dev->pen_down && z1 != 0 && z2 != 0) { + /* + * Calculate pressure using formula + * Resistance(touch) = x plate resistance * + * x postion/4096 * ((z2 / z1) - 1) + */ + z = z2 - z1; + z *= x; + z *= ts_dev->x_plate_resistance; + z /= z1; + z = (z + 2047) >> 12; + + if (z <= MAX_12BIT) { + input_report_abs(input_dev, ABS_X, x); + input_report_abs(input_dev, ABS_Y, y); + input_report_abs(input_dev, ABS_PRESSURE, z); + input_report_key(input_dev, BTN_TOUCH, 1); + input_sync(input_dev); + } + } + irqclr |= IRQENB_FIFO0THRES; + } + + /* + * Time for sequencer to settle, to read + * correct state of the sequencer. + */ + udelay(SEQ_SETTLE); + + status = titsc_readl(ts_dev, REG_RAWIRQSTATUS); + if (status & IRQENB_PENUP) { + /* Pen up event */ + fsm = titsc_readl(ts_dev, REG_ADCFSM); + if (fsm == ADCFSM_STEPID) { + ts_dev->pen_down = false; + input_report_key(input_dev, BTN_TOUCH, 0); + input_report_abs(input_dev, ABS_PRESSURE, 0); + input_sync(input_dev); + } else { + ts_dev->pen_down = true; + } + irqclr |= IRQENB_PENUP; + } + + titsc_writel(ts_dev, REG_IRQSTATUS, irqclr); + + titsc_writel(ts_dev, REG_SE, STPENB_STEPENB); + return IRQ_HANDLED; +} + +/* + * The functions for inserting/removing driver as a module. + */ + +static int __devinit titsc_probe(struct platform_device *pdev) +{ + const struct tsc_data *pdata = pdev->dev.platform_data; + struct resource *res; + struct titsc *ts_dev; + struct input_dev *input_dev; + struct clk *clk; + int err; + int clk_value, ctrl, irq; + + if (!pdata) { + dev_err(&pdev->dev, "missing platform data.\n"); + return -EINVAL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "no memory resource defined.\n"); + return -EINVAL; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no irq ID is specified.\n"); + return -EINVAL; + } + + /* Allocate memory for device */ + ts_dev = kzalloc(sizeof(struct titsc), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!ts_dev || !input_dev) { + dev_err(&pdev->dev, "failed to allocate memory.\n"); + err = -ENOMEM; + goto err_free_mem; + } + + ts_dev->input = input_dev; + ts_dev->irq = irq; + ts_dev->wires = pdata->wires; + ts_dev->x_plate_resistance = pdata->x_plate_resistance; + ts_dev->steps_to_configure = pdata->steps_to_configure; + + res = request_mem_region(res->start, resource_size(res), pdev->name); + if (!res) { + dev_err(&pdev->dev, "failed to reserve registers.\n"); + err = -EBUSY; + goto err_free_mem; + } + + ts_dev->tsc_base = ioremap(res->start, resource_size(res)); + if (!ts_dev->tsc_base) { + dev_err(&pdev->dev, "failed to map registers.\n"); + err = -ENOMEM; + goto err_release_mem_region; + } + + err = request_irq(ts_dev->irq, titsc_irq, + 0, pdev->dev.driver->name, ts_dev); + if (err) { + dev_err(&pdev->dev, "failed to allocate irq.\n"); + goto err_unmap_regs; + } + + ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick"); + if (IS_ERR(ts_dev->tsc_ick)) { + dev_err(&pdev->dev, "failed to get TSC ick\n"); + goto err_free_irq; + } + clk_enable(ts_dev->tsc_ick); + + clk = clk_get(&pdev->dev, "adc_tsc_fck"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "failed to get TSC fck\n"); + err = PTR_ERR(clk); + goto err_disable_clk; + } + + clk_value = clk_get_rate(clk) / ADC_CLK; + clk_put(clk); + + if (clk_value < 7) { + dev_err(&pdev->dev, "clock input less than min clock requirement\n"); + goto err_disable_clk; + } + /* CLKDIV needs to be configured to the value minus 1 */ + titsc_writel(ts_dev, REG_CLKDIV, clk_value - 1); + + /* Enable wake-up of the SoC using touchscreen */ + titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB); + + ctrl = CNTRLREG_STEPCONFIGWRT | + CNTRLREG_TSCENB | + CNTRLREG_STEPID; + switch (ts_dev->wires) { + case 4: + ctrl |= CNTRLREG_4WIRE; + break; + case 5: + ctrl |= CNTRLREG_5WIRE; + break; + case 8: + ctrl |= CNTRLREG_8WIRE; + break; + } + titsc_writel(ts_dev, REG_CTRL, ctrl); + + titsc_idle_config(ts_dev); + titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES); + titsc_step_config(ts_dev); + titsc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure); + + ctrl |= CNTRLREG_TSCSSENB; + titsc_writel(ts_dev, REG_CTRL, ctrl); + + input_dev->name = "ti-tsc-adc"; + input_dev->dev.parent = &pdev->dev; + + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); + input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); + + input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0); + input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0); + input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0); + + /* register to the input system */ + err = input_register_device(input_dev); + if (err) + goto err_disable_clk; + + platform_set_drvdata(pdev, ts_dev); + return 0; + +err_disable_clk: + clk_disable(ts_dev->tsc_ick); + clk_put(ts_dev->tsc_ick); +err_free_irq: + free_irq(ts_dev->irq, ts_dev); +err_unmap_regs: + iounmap(ts_dev->tsc_base); +err_release_mem_region: + release_mem_region(res->start, resource_size(res)); +err_free_mem: + input_free_device(input_dev); + kfree(ts_dev); + return err; +} + +static int __devexit titsc_remove(struct platform_device *pdev) +{ + struct titsc *ts_dev = platform_get_drvdata(pdev); + struct resource *res; + + free_irq(ts_dev->irq, ts_dev); + + input_unregister_device(ts_dev->input); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + iounmap(ts_dev->tsc_base); + release_mem_region(res->start, resource_size(res)); + + clk_disable(ts_dev->tsc_ick); + clk_put(ts_dev->tsc_ick); + + kfree(ts_dev); + + platform_set_drvdata(pdev, NULL); + return 0; +} + +static struct platform_driver ti_tsc_driver = { + .probe = titsc_probe, + .remove = __devexit_p(titsc_remove), + .driver = { + .name = "tsc", + .owner = THIS_MODULE, + }, +}; +module_platform_driver(ti_tsc_driver); + +MODULE_DESCRIPTION("TI touchscreen controller driver"); +MODULE_AUTHOR("Rachna Patil "); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/ti_tscadc.c b/drivers/input/touchscreen/ti_tscadc.c deleted file mode 100644 index ec0a442478a8..000000000000 --- a/drivers/input/touchscreen/ti_tscadc.c +++ /dev/null @@ -1,522 +0,0 @@ -/* - * TI Touch Screen driver - * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define REG_RAWIRQSTATUS 0x024 -#define REG_IRQSTATUS 0x028 -#define REG_IRQENABLE 0x02C -#define REG_IRQWAKEUP 0x034 -#define REG_CTRL 0x040 -#define REG_ADCFSM 0x044 -#define REG_CLKDIV 0x04C -#define REG_SE 0x054 -#define REG_IDLECONFIG 0x058 -#define REG_CHARGECONFIG 0x05C -#define REG_CHARGEDELAY 0x060 -#define REG_STEPCONFIG(n) (0x64 + ((n - 1) * 8)) -#define REG_STEPDELAY(n) (0x68 + ((n - 1) * 8)) -#define REG_FIFO0CNT 0xE4 -#define REG_FIFO0THR 0xE8 -#define REG_FIFO1THR 0xF4 -#define REG_FIFO0 0x100 -#define REG_FIFO1 0x200 - -/* Register Bitfields */ -#define IRQWKUP_ENB BIT(0) - -/* Step Enable */ -#define STEPENB_MASK (0x1FFFF << 0) -#define STEPENB(val) (val << 0) -#define STPENB_STEPENB STEPENB(0x7FFF) - -/* IRQ enable */ -#define IRQENB_FIFO0THRES BIT(2) -#define IRQENB_FIFO1THRES BIT(5) -#define IRQENB_PENUP BIT(9) - -/* Step Configuration */ -#define STEPCONFIG_MODE_MASK (3 << 0) -#define STEPCONFIG_MODE(val) (val << 0) -#define STEPCONFIG_MODE_HWSYNC STEPCONFIG_MODE(2) -#define STEPCONFIG_AVG_MASK (7 << 2) -#define STEPCONFIG_AVG(val) (val << 2) -#define STEPCONFIG_AVG_16 STEPCONFIG_AVG(4) -#define STEPCONFIG_XPP BIT(5) -#define STEPCONFIG_XNN BIT(6) -#define STEPCONFIG_YPP BIT(7) -#define STEPCONFIG_YNN BIT(8) -#define STEPCONFIG_XNP BIT(9) -#define STEPCONFIG_YPN BIT(10) -#define STEPCONFIG_INM_MASK (0xF << 15) -#define STEPCONFIG_INM(val) (val << 15) -#define STEPCONFIG_INM_ADCREFM STEPCONFIG_INM(8) -#define STEPCONFIG_INP_MASK (0xF << 19) -#define STEPCONFIG_INP(val) (val << 19) -#define STEPCONFIG_INP_AN2 STEPCONFIG_INP(2) -#define STEPCONFIG_INP_AN3 STEPCONFIG_INP(3) -#define STEPCONFIG_INP_AN4 STEPCONFIG_INP(4) -#define STEPCONFIG_INP_ADCREFM STEPCONFIG_INP(8) -#define STEPCONFIG_FIFO1 BIT(26) - -/* Delay register */ -#define STEPDELAY_OPEN_MASK (0x3FFFF << 0) -#define STEPDELAY_OPEN(val) (val << 0) -#define STEPCONFIG_OPENDLY STEPDELAY_OPEN(0x098) - -/* Charge Config */ -#define STEPCHARGE_RFP_MASK (7 << 12) -#define STEPCHARGE_RFP(val) (val << 12) -#define STEPCHARGE_RFP_XPUL STEPCHARGE_RFP(1) -#define STEPCHARGE_INM_MASK (0xF << 15) -#define STEPCHARGE_INM(val) (val << 15) -#define STEPCHARGE_INM_AN1 STEPCHARGE_INM(1) -#define STEPCHARGE_INP_MASK (0xF << 19) -#define STEPCHARGE_INP(val) (val << 19) -#define STEPCHARGE_INP_AN1 STEPCHARGE_INP(1) -#define STEPCHARGE_RFM_MASK (3 << 23) -#define STEPCHARGE_RFM(val) (val << 23) -#define STEPCHARGE_RFM_XNUR STEPCHARGE_RFM(1) - -/* Charge delay */ -#define CHARGEDLY_OPEN_MASK (0x3FFFF << 0) -#define CHARGEDLY_OPEN(val) (val << 0) -#define CHARGEDLY_OPENDLY CHARGEDLY_OPEN(1) - -/* Control register */ -#define CNTRLREG_TSCSSENB BIT(0) -#define CNTRLREG_STEPID BIT(1) -#define CNTRLREG_STEPCONFIGWRT BIT(2) -#define CNTRLREG_AFE_CTRL_MASK (3 << 5) -#define CNTRLREG_AFE_CTRL(val) (val << 5) -#define CNTRLREG_4WIRE CNTRLREG_AFE_CTRL(1) -#define CNTRLREG_5WIRE CNTRLREG_AFE_CTRL(2) -#define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) -#define CNTRLREG_TSCENB BIT(7) - -#define ADCFSM_STEPID 0x10 -#define SEQ_SETTLE 275 -#define ADC_CLK 3000000 -#define MAX_12BIT ((1 << 12) - 1) - -struct tscadc { - struct input_dev *input; - struct clk *tsc_ick; - void __iomem *tsc_base; - unsigned int irq; - unsigned int wires; - unsigned int x_plate_resistance; - bool pen_down; - int steps_to_configure; -}; - -static unsigned int tscadc_readl(struct tscadc *ts, unsigned int reg) -{ - return readl(ts->tsc_base + reg); -} - -static void tscadc_writel(struct tscadc *tsc, unsigned int reg, - unsigned int val) -{ - writel(val, tsc->tsc_base + reg); -} - -static void tscadc_step_config(struct tscadc *ts_dev) -{ - unsigned int config; - int i, total_steps; - - /* Configure the Step registers */ - total_steps = 2 * ts_dev->steps_to_configure; - - config = STEPCONFIG_MODE_HWSYNC | - STEPCONFIG_AVG_16 | STEPCONFIG_XPP; - switch (ts_dev->wires) { - case 4: - config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN; - break; - case 5: - config |= STEPCONFIG_YNN | - STEPCONFIG_INP_AN4 | STEPCONFIG_XNN | - STEPCONFIG_YPP; - break; - case 8: - config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN; - break; - } - - for (i = 1; i <= ts_dev->steps_to_configure; i++) { - tscadc_writel(ts_dev, REG_STEPCONFIG(i), config); - tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); - } - - config = 0; - config = STEPCONFIG_MODE_HWSYNC | - STEPCONFIG_AVG_16 | STEPCONFIG_YNN | - STEPCONFIG_INM_ADCREFM | STEPCONFIG_FIFO1; - switch (ts_dev->wires) { - case 4: - config |= STEPCONFIG_YPP; - break; - case 5: - config |= STEPCONFIG_XPP | STEPCONFIG_INP_AN4 | - STEPCONFIG_XNP | STEPCONFIG_YPN; - break; - case 8: - config |= STEPCONFIG_YPP; - break; - } - - for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) { - tscadc_writel(ts_dev, REG_STEPCONFIG(i), config); - tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY); - } - - config = 0; - /* Charge step configuration */ - config = STEPCONFIG_XPP | STEPCONFIG_YNN | - STEPCHARGE_RFP_XPUL | STEPCHARGE_RFM_XNUR | - STEPCHARGE_INM_AN1 | STEPCHARGE_INP_AN1; - - tscadc_writel(ts_dev, REG_CHARGECONFIG, config); - tscadc_writel(ts_dev, REG_CHARGEDELAY, CHARGEDLY_OPENDLY); - - config = 0; - /* Configure to calculate pressure */ - config = STEPCONFIG_MODE_HWSYNC | - STEPCONFIG_AVG_16 | STEPCONFIG_YPP | - STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM; - tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config); - tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 1), - STEPCONFIG_OPENDLY); - - config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1; - tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config); - tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 2), - STEPCONFIG_OPENDLY); - - tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB); -} - -static void tscadc_idle_config(struct tscadc *ts_config) -{ - unsigned int idleconfig; - - idleconfig = STEPCONFIG_YNN | - STEPCONFIG_INM_ADCREFM | - STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM; - tscadc_writel(ts_config, REG_IDLECONFIG, idleconfig); -} - -static void tscadc_read_coordinates(struct tscadc *ts_dev, - unsigned int *x, unsigned int *y) -{ - unsigned int fifocount = tscadc_readl(ts_dev, REG_FIFO0CNT); - unsigned int prev_val_x = ~0, prev_val_y = ~0; - unsigned int prev_diff_x = ~0, prev_diff_y = ~0; - unsigned int read, diff; - unsigned int i; - - /* - * Delta filter is used to remove large variations in sampled - * values from ADC. The filter tries to predict where the next - * coordinate could be. This is done by taking a previous - * coordinate and subtracting it form current one. Further the - * algorithm compares the difference with that of a present value, - * if true the value is reported to the sub system. - */ - for (i = 0; i < fifocount - 1; i++) { - read = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff; - diff = abs(read - prev_val_x); - if (diff < prev_diff_x) { - prev_diff_x = diff; - *x = read; - } - prev_val_x = read; - - read = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff; - diff = abs(read - prev_val_y); - if (diff < prev_diff_y) { - prev_diff_y = diff; - *y = read; - } - prev_val_y = read; - } -} - -static irqreturn_t tscadc_irq(int irq, void *dev) -{ - struct tscadc *ts_dev = dev; - struct input_dev *input_dev = ts_dev->input; - unsigned int status, irqclr = 0; - unsigned int x = 0, y = 0; - unsigned int z1, z2, z; - unsigned int fsm; - - status = tscadc_readl(ts_dev, REG_IRQSTATUS); - if (status & IRQENB_FIFO0THRES) { - tscadc_read_coordinates(ts_dev, &x, &y); - - z1 = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff; - z2 = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff; - - if (ts_dev->pen_down && z1 != 0 && z2 != 0) { - /* - * Calculate pressure using formula - * Resistance(touch) = x plate resistance * - * x postion/4096 * ((z2 / z1) - 1) - */ - z = z2 - z1; - z *= x; - z *= ts_dev->x_plate_resistance; - z /= z1; - z = (z + 2047) >> 12; - - if (z <= MAX_12BIT) { - input_report_abs(input_dev, ABS_X, x); - input_report_abs(input_dev, ABS_Y, y); - input_report_abs(input_dev, ABS_PRESSURE, z); - input_report_key(input_dev, BTN_TOUCH, 1); - input_sync(input_dev); - } - } - irqclr |= IRQENB_FIFO0THRES; - } - - /* - * Time for sequencer to settle, to read - * correct state of the sequencer. - */ - udelay(SEQ_SETTLE); - - status = tscadc_readl(ts_dev, REG_RAWIRQSTATUS); - if (status & IRQENB_PENUP) { - /* Pen up event */ - fsm = tscadc_readl(ts_dev, REG_ADCFSM); - if (fsm == ADCFSM_STEPID) { - ts_dev->pen_down = false; - input_report_key(input_dev, BTN_TOUCH, 0); - input_report_abs(input_dev, ABS_PRESSURE, 0); - input_sync(input_dev); - } else { - ts_dev->pen_down = true; - } - irqclr |= IRQENB_PENUP; - } - - tscadc_writel(ts_dev, REG_IRQSTATUS, irqclr); - - tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB); - return IRQ_HANDLED; -} - -/* - * The functions for inserting/removing driver as a module. - */ - -static int __devinit tscadc_probe(struct platform_device *pdev) -{ - const struct tsc_data *pdata = pdev->dev.platform_data; - struct resource *res; - struct tscadc *ts_dev; - struct input_dev *input_dev; - struct clk *clk; - int err; - int clk_value, ctrl, irq; - - if (!pdata) { - dev_err(&pdev->dev, "missing platform data.\n"); - return -EINVAL; - } - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "no memory resource defined.\n"); - return -EINVAL; - } - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "no irq ID is specified.\n"); - return -EINVAL; - } - - /* Allocate memory for device */ - ts_dev = kzalloc(sizeof(struct tscadc), GFP_KERNEL); - input_dev = input_allocate_device(); - if (!ts_dev || !input_dev) { - dev_err(&pdev->dev, "failed to allocate memory.\n"); - err = -ENOMEM; - goto err_free_mem; - } - - ts_dev->input = input_dev; - ts_dev->irq = irq; - ts_dev->wires = pdata->wires; - ts_dev->x_plate_resistance = pdata->x_plate_resistance; - ts_dev->steps_to_configure = pdata->steps_to_configure; - - res = request_mem_region(res->start, resource_size(res), pdev->name); - if (!res) { - dev_err(&pdev->dev, "failed to reserve registers.\n"); - err = -EBUSY; - goto err_free_mem; - } - - ts_dev->tsc_base = ioremap(res->start, resource_size(res)); - if (!ts_dev->tsc_base) { - dev_err(&pdev->dev, "failed to map registers.\n"); - err = -ENOMEM; - goto err_release_mem_region; - } - - err = request_irq(ts_dev->irq, tscadc_irq, - 0, pdev->dev.driver->name, ts_dev); - if (err) { - dev_err(&pdev->dev, "failed to allocate irq.\n"); - goto err_unmap_regs; - } - - ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick"); - if (IS_ERR(ts_dev->tsc_ick)) { - dev_err(&pdev->dev, "failed to get TSC ick\n"); - goto err_free_irq; - } - clk_enable(ts_dev->tsc_ick); - - clk = clk_get(&pdev->dev, "adc_tsc_fck"); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "failed to get TSC fck\n"); - err = PTR_ERR(clk); - goto err_disable_clk; - } - - clk_value = clk_get_rate(clk) / ADC_CLK; - clk_put(clk); - - if (clk_value < 7) { - dev_err(&pdev->dev, "clock input less than min clock requirement\n"); - goto err_disable_clk; - } - /* CLKDIV needs to be configured to the value minus 1 */ - tscadc_writel(ts_dev, REG_CLKDIV, clk_value - 1); - - /* Enable wake-up of the SoC using touchscreen */ - tscadc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB); - - ctrl = CNTRLREG_STEPCONFIGWRT | - CNTRLREG_TSCENB | - CNTRLREG_STEPID; - switch (ts_dev->wires) { - case 4: - ctrl |= CNTRLREG_4WIRE; - break; - case 5: - ctrl |= CNTRLREG_5WIRE; - break; - case 8: - ctrl |= CNTRLREG_8WIRE; - break; - } - tscadc_writel(ts_dev, REG_CTRL, ctrl); - - tscadc_idle_config(ts_dev); - tscadc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES); - tscadc_step_config(ts_dev); - tscadc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure); - - ctrl |= CNTRLREG_TSCSSENB; - tscadc_writel(ts_dev, REG_CTRL, ctrl); - - input_dev->name = "ti-tsc-adc"; - input_dev->dev.parent = &pdev->dev; - - input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); - input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); - - input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0); - input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0); - input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0); - - /* register to the input system */ - err = input_register_device(input_dev); - if (err) - goto err_disable_clk; - - platform_set_drvdata(pdev, ts_dev); - return 0; - -err_disable_clk: - clk_disable(ts_dev->tsc_ick); - clk_put(ts_dev->tsc_ick); -err_free_irq: - free_irq(ts_dev->irq, ts_dev); -err_unmap_regs: - iounmap(ts_dev->tsc_base); -err_release_mem_region: - release_mem_region(res->start, resource_size(res)); -err_free_mem: - input_free_device(input_dev); - kfree(ts_dev); - return err; -} - -static int __devexit tscadc_remove(struct platform_device *pdev) -{ - struct tscadc *ts_dev = platform_get_drvdata(pdev); - struct resource *res; - - free_irq(ts_dev->irq, ts_dev); - - input_unregister_device(ts_dev->input); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - iounmap(ts_dev->tsc_base); - release_mem_region(res->start, resource_size(res)); - - clk_disable(ts_dev->tsc_ick); - clk_put(ts_dev->tsc_ick); - - kfree(ts_dev); - - platform_set_drvdata(pdev, NULL); - return 0; -} - -static struct platform_driver ti_tsc_driver = { - .probe = tscadc_probe, - .remove = __devexit_p(tscadc_remove), - .driver = { - .name = "tsc", - .owner = THIS_MODULE, - }, -}; -module_platform_driver(ti_tsc_driver); - -MODULE_DESCRIPTION("TI touchscreen controller driver"); -MODULE_AUTHOR("Rachna Patil "); -MODULE_LICENSE("GPL"); diff --git a/include/linux/input/ti_am335x_tsc.h b/include/linux/input/ti_am335x_tsc.h new file mode 100644 index 000000000000..49269a2aa329 --- /dev/null +++ b/include/linux/input/ti_am335x_tsc.h @@ -0,0 +1,23 @@ +#ifndef __LINUX_TI_AM335X_TSC_H +#define __LINUX_TI_AM335X_TSC_H + +/** + * struct tsc_data Touchscreen wire configuration + * @wires: Wires refer to application modes + * i.e. 4/5/8 wire touchscreen support + * on the platform. + * @x_plate_resistance: X plate resistance. + * @steps_to_configure: The sequencer supports a total of + * 16 programmable steps. + * A step configured to read a single + * co-ordinate value, can be applied + * more number of times for better results. + */ + +struct tsc_data { + int wires; + int x_plate_resistance; + int steps_to_configure; +}; + +#endif diff --git a/include/linux/input/ti_tscadc.h b/include/linux/input/ti_tscadc.h deleted file mode 100644 index ad442a3eb68b..000000000000 --- a/include/linux/input/ti_tscadc.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __LINUX_TI_TSCADC_H -#define __LINUX_TI_TSCADC_H - -/** - * struct tsc_data Touchscreen wire configuration - * @wires: Wires refer to application modes - * i.e. 4/5/8 wire touchscreen support - * on the platform. - * @x_plate_resistance: X plate resistance. - * @steps_to_configure: The sequencer supports a total of - * 16 programmable steps. - * A step configured to read a single - * co-ordinate value, can be applied - * more number of times for better results. - */ - -struct tsc_data { - int wires; - int x_plate_resistance; - int steps_to_configure; -}; - -#endif -- cgit v1.2.3 From 01636eb970a029897b06fb96026941429212ddd9 Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Tue, 16 Oct 2012 12:55:43 +0530 Subject: mfd: ti_tscadc: Add support for TI's TSC/ADC MFDevice Add the mfd core driver which supports touchscreen and ADC. With this patch we are only adding infrastructure to support the MFD clients. Signed-off-by: Patil, Rachna Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 11 ++ drivers/mfd/Makefile | 1 + drivers/mfd/ti_am335x_tscadc.c | 250 +++++++++++++++++++++++++++++++++++ include/linux/mfd/ti_am335x_tscadc.h | 137 +++++++++++++++++++ 4 files changed, 399 insertions(+) create mode 100644 drivers/mfd/ti_am335x_tscadc.c create mode 100644 include/linux/mfd/ti_am335x_tscadc.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index acab3ef8a310..9bba7f78ff36 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -94,6 +94,17 @@ config MFD_TI_SSP To compile this driver as a module, choose M here: the module will be called ti-ssp. +config MFD_TI_AM335X_TSCADC + tristate "TI ADC / Touch Screen chip support" + select MFD_CORE + select REGMAP + select REGMAP_MMIO + help + If you say yes here you get support for Texas Instruments series + of Touch Screen /ADC chips. + To compile this driver as a module, choose M here: the + module will be called ti_am335x_tscadc. + config HTC_EGPIO bool "HTC EGPIO support" depends on GENERIC_HARDIRQS && GPIOLIB && ARM diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index d8ccb630ddb0..442c17e40741 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_HTC_I2CPLD) += htc-i2cpld.o obj-$(CONFIG_MFD_DAVINCI_VOICECODEC) += davinci_voicecodec.o obj-$(CONFIG_MFD_DM355EVM_MSP) += dm355evm_msp.o obj-$(CONFIG_MFD_TI_SSP) += ti-ssp.o +obj-$(CONFIG_MFD_TI_AM335X_TSCADC) += ti_am335x_tscadc.o obj-$(CONFIG_MFD_STA2X11) += sta2x11-mfd.o obj-$(CONFIG_MFD_STMPE) += stmpe.o diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c new file mode 100644 index 000000000000..14df67bc390f --- /dev/null +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -0,0 +1,250 @@ +/* + * TI Touch Screen / ADC MFD driver + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg) +{ + unsigned int val; + + regmap_read(tsadc->regmap_tscadc, reg, &val); + return val; +} + +static void tscadc_writel(struct ti_tscadc_dev *tsadc, unsigned int reg, + unsigned int val) +{ + regmap_write(tsadc->regmap_tscadc, reg, val); +} + +static const struct regmap_config tscadc_regmap_config = { + .name = "ti_tscadc", + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, +}; + +static void tscadc_idle_config(struct ti_tscadc_dev *config) +{ + unsigned int idleconfig; + + idleconfig = STEPCONFIG_YNN | STEPCONFIG_INM_ADCREFM | + STEPCONFIG_INP_ADCREFM | STEPCONFIG_YPN; + + tscadc_writel(config, REG_IDLECONFIG, idleconfig); +} + +static int __devinit ti_tscadc_probe(struct platform_device *pdev) +{ + struct ti_tscadc_dev *tscadc; + struct resource *res; + struct clk *clk; + struct mfd_tscadc_board *pdata = pdev->dev.platform_data; + int irq; + int err, ctrl; + int clk_value, clock_rate; + + if (!pdata) { + dev_err(&pdev->dev, "Could not find platform data\n"); + return -EINVAL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "no memory resource defined.\n"); + return -EINVAL; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no irq ID is specified.\n"); + return -EINVAL; + } + + /* Allocate memory for device */ + tscadc = devm_kzalloc(&pdev->dev, + sizeof(struct ti_tscadc_dev), GFP_KERNEL); + if (!tscadc) { + dev_err(&pdev->dev, "failed to allocate memory.\n"); + return -ENOMEM; + } + tscadc->dev = &pdev->dev; + tscadc->irq = irq; + + res = devm_request_mem_region(&pdev->dev, + res->start, resource_size(res), pdev->name); + if (!res) { + dev_err(&pdev->dev, "failed to reserve registers.\n"); + err = -EBUSY; + goto err; + } + + tscadc->tscadc_base = devm_ioremap(&pdev->dev, + res->start, resource_size(res)); + if (!tscadc->tscadc_base) { + dev_err(&pdev->dev, "failed to map registers.\n"); + err = -ENOMEM; + goto err; + } + + tscadc->regmap_tscadc = devm_regmap_init_mmio(&pdev->dev, + tscadc->tscadc_base, &tscadc_regmap_config); + if (IS_ERR(tscadc->regmap_tscadc)) { + dev_err(&pdev->dev, "regmap init failed\n"); + err = PTR_ERR(tscadc->regmap_tscadc); + goto err; + } + + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + /* + * The TSC_ADC_Subsystem has 2 clock domains + * OCP_CLK and ADC_CLK. + * The ADC clock is expected to run at target of 3MHz, + * and expected to capture 12-bit data at a rate of 200 KSPS. + * The TSC_ADC_SS controller design assumes the OCP clock is + * at least 6x faster than the ADC clock. + */ + clk = clk_get(&pdev->dev, "adc_tsc_fck"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "failed to get TSC fck\n"); + err = PTR_ERR(clk); + goto err_disable_clk; + } + clock_rate = clk_get_rate(clk); + clk_put(clk); + clk_value = clock_rate / ADC_CLK; + if (clk_value < MAX_CLK_DIV) { + dev_err(&pdev->dev, "clock input less than min clock requirement\n"); + err = -EINVAL; + goto err_disable_clk; + } + /* TSCADC_CLKDIV needs to be configured to the value minus 1 */ + clk_value = clk_value - 1; + tscadc_writel(tscadc, REG_CLKDIV, clk_value); + + /* Set the control register bits */ + ctrl = CNTRLREG_STEPCONFIGWRT | + CNTRLREG_TSCENB | + CNTRLREG_STEPID | + CNTRLREG_4WIRE; + tscadc_writel(tscadc, REG_CTRL, ctrl); + + /* Set register bits for Idle Config Mode */ + tscadc_idle_config(tscadc); + + /* Enable the TSC module enable bit */ + ctrl = tscadc_readl(tscadc, REG_CTRL); + ctrl |= CNTRLREG_TSCSSENB; + tscadc_writel(tscadc, REG_CTRL, ctrl); + + err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells, + TSCADC_CELLS, NULL, 0, NULL); + if (err < 0) + goto err_disable_clk; + + device_init_wakeup(&pdev->dev, true); + platform_set_drvdata(pdev, tscadc); + + return 0; + +err_disable_clk: + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); +err: + return err; +} + +static int __devexit ti_tscadc_remove(struct platform_device *pdev) +{ + struct ti_tscadc_dev *tscadc = platform_get_drvdata(pdev); + + tscadc_writel(tscadc, REG_SE, 0x00); + + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + mfd_remove_devices(tscadc->dev); + + return 0; +} + +#ifdef CONFIG_PM +static int tscadc_suspend(struct device *dev) +{ + struct ti_tscadc_dev *tscadc_dev = dev_get_drvdata(dev); + + tscadc_writel(tscadc_dev, REG_SE, 0x00); + pm_runtime_put_sync(dev); + + return 0; +} + +static int tscadc_resume(struct device *dev) +{ + struct ti_tscadc_dev *tscadc_dev = dev_get_drvdata(dev); + unsigned int restore, ctrl; + + pm_runtime_get_sync(dev); + + /* context restore */ + ctrl = CNTRLREG_STEPCONFIGWRT | CNTRLREG_TSCENB | + CNTRLREG_STEPID | CNTRLREG_4WIRE; + tscadc_writel(tscadc_dev, REG_CTRL, ctrl); + tscadc_idle_config(tscadc_dev); + tscadc_writel(tscadc_dev, REG_SE, STPENB_STEPENB); + restore = tscadc_readl(tscadc_dev, REG_CTRL); + tscadc_writel(tscadc_dev, REG_CTRL, + (restore | CNTRLREG_TSCSSENB)); + + return 0; +} + +static const struct dev_pm_ops tscadc_pm_ops = { + .suspend = tscadc_suspend, + .resume = tscadc_resume, +}; +#define TSCADC_PM_OPS (&tscadc_pm_ops) +#else +#define TSCADC_PM_OPS NULL +#endif + +static struct platform_driver ti_tscadc_driver = { + .driver = { + .name = "ti_tscadc", + .owner = THIS_MODULE, + .pm = TSCADC_PM_OPS, + }, + .probe = ti_tscadc_probe, + .remove = __devexit_p(ti_tscadc_remove), + +}; + +module_platform_driver(ti_tscadc_driver); + +MODULE_DESCRIPTION("TI touchscreen / ADC MFD controller driver"); +MODULE_AUTHOR("Rachna Patil "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h new file mode 100644 index 000000000000..b7232b157061 --- /dev/null +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -0,0 +1,137 @@ +#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H +#define __LINUX_TI_AM335X_TSCADC_MFD_H + +/* + * TI Touch Screen / ADC MFD driver + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include + +#define REG_RAWIRQSTATUS 0x024 +#define REG_IRQSTATUS 0x028 +#define REG_IRQENABLE 0x02C +#define REG_IRQCLR 0x030 +#define REG_IRQWAKEUP 0x034 +#define REG_CTRL 0x040 +#define REG_ADCFSM 0x044 +#define REG_CLKDIV 0x04C +#define REG_SE 0x054 +#define REG_IDLECONFIG 0x058 +#define REG_CHARGECONFIG 0x05C +#define REG_CHARGEDELAY 0x060 +#define REG_STEPCONFIG(n) (0x64 + ((n - 1) * 8)) +#define REG_STEPDELAY(n) (0x68 + ((n - 1) * 8)) +#define REG_FIFO0CNT 0xE4 +#define REG_FIFO0THR 0xE8 +#define REG_FIFO1CNT 0xF0 +#define REG_FIFO1THR 0xF4 +#define REG_FIFO0 0x100 +#define REG_FIFO1 0x200 + +/* Register Bitfields */ +/* IRQ wakeup enable */ +#define IRQWKUP_ENB BIT(0) + +/* Step Enable */ +#define STEPENB_MASK (0x1FFFF << 0) +#define STEPENB(val) ((val) << 0) +#define STPENB_STEPENB STEPENB(0x1FFFF) + +/* IRQ enable */ +#define IRQENB_HW_PEN BIT(0) +#define IRQENB_FIFO0THRES BIT(2) +#define IRQENB_FIFO1THRES BIT(5) +#define IRQENB_PENUP BIT(9) + +/* Step Configuration */ +#define STEPCONFIG_MODE_MASK (3 << 0) +#define STEPCONFIG_MODE(val) ((val) << 0) +#define STEPCONFIG_MODE_HWSYNC STEPCONFIG_MODE(2) +#define STEPCONFIG_AVG_MASK (7 << 2) +#define STEPCONFIG_AVG(val) ((val) << 2) +#define STEPCONFIG_AVG_16 STEPCONFIG_AVG(4) +#define STEPCONFIG_XPP BIT(5) +#define STEPCONFIG_XNN BIT(6) +#define STEPCONFIG_YPP BIT(7) +#define STEPCONFIG_YNN BIT(8) +#define STEPCONFIG_XNP BIT(9) +#define STEPCONFIG_YPN BIT(10) +#define STEPCONFIG_INM_MASK (0xF << 15) +#define STEPCONFIG_INM(val) ((val) << 15) +#define STEPCONFIG_INM_ADCREFM STEPCONFIG_INM(8) +#define STEPCONFIG_INP_MASK (0xF << 19) +#define STEPCONFIG_INP(val) ((val) << 19) +#define STEPCONFIG_INP_AN2 STEPCONFIG_INP(2) +#define STEPCONFIG_INP_AN3 STEPCONFIG_INP(3) +#define STEPCONFIG_INP_AN4 STEPCONFIG_INP(4) +#define STEPCONFIG_INP_ADCREFM STEPCONFIG_INP(8) +#define STEPCONFIG_FIFO1 BIT(26) + +/* Delay register */ +#define STEPDELAY_OPEN_MASK (0x3FFFF << 0) +#define STEPDELAY_OPEN(val) ((val) << 0) +#define STEPCONFIG_OPENDLY STEPDELAY_OPEN(0x098) +#define STEPDELAY_SAMPLE_MASK (0xFF << 24) +#define STEPDELAY_SAMPLE(val) ((val) << 24) +#define STEPCONFIG_SAMPLEDLY STEPDELAY_SAMPLE(0) + +/* Charge Config */ +#define STEPCHARGE_RFP_MASK (7 << 12) +#define STEPCHARGE_RFP(val) ((val) << 12) +#define STEPCHARGE_RFP_XPUL STEPCHARGE_RFP(1) +#define STEPCHARGE_INM_MASK (0xF << 15) +#define STEPCHARGE_INM(val) ((val) << 15) +#define STEPCHARGE_INM_AN1 STEPCHARGE_INM(1) +#define STEPCHARGE_INP_MASK (0xF << 19) +#define STEPCHARGE_INP(val) ((val) << 19) +#define STEPCHARGE_INP_AN1 STEPCHARGE_INP(1) +#define STEPCHARGE_RFM_MASK (3 << 23) +#define STEPCHARGE_RFM(val) ((val) << 23) +#define STEPCHARGE_RFM_XNUR STEPCHARGE_RFM(1) + +/* Charge delay */ +#define CHARGEDLY_OPEN_MASK (0x3FFFF << 0) +#define CHARGEDLY_OPEN(val) ((val) << 0) +#define CHARGEDLY_OPENDLY CHARGEDLY_OPEN(1) + +/* Control register */ +#define CNTRLREG_TSCSSENB BIT(0) +#define CNTRLREG_STEPID BIT(1) +#define CNTRLREG_STEPCONFIGWRT BIT(2) +#define CNTRLREG_POWERDOWN BIT(4) +#define CNTRLREG_AFE_CTRL_MASK (3 << 5) +#define CNTRLREG_AFE_CTRL(val) ((val) << 5) +#define CNTRLREG_4WIRE CNTRLREG_AFE_CTRL(1) +#define CNTRLREG_5WIRE CNTRLREG_AFE_CTRL(2) +#define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) +#define CNTRLREG_TSCENB BIT(7) + +#define ADC_CLK 3000000 +#define MAX_CLK_DIV 7 + +#define TSCADC_CELLS 0 + +struct mfd_tscadc_board { + struct tsc_data *tsc_init; +}; + +struct ti_tscadc_dev { + struct device *dev; + struct regmap *regmap_tscadc; + void __iomem *tscadc_base; + int irq; + struct mfd_cell cells[TSCADC_CELLS]; +}; + +#endif -- cgit v1.2.3 From 2b99bafab19145a72e2c557326fc4662a864a162 Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Tue, 16 Oct 2012 12:55:44 +0530 Subject: input: TSC: ti_tsc: Convert TSC into a MFDevice This patch converts touchscreen into a MFD client. All the register definitions, clock initialization, etc has been moved to MFD core driver. Signed-off-by: Patil, Rachna Signed-off-by: Samuel Ortiz --- drivers/input/touchscreen/Kconfig | 2 +- drivers/input/touchscreen/ti_am335x_tsc.c | 318 +++++++++--------------------- drivers/mfd/ti_am335x_tscadc.c | 11 ++ include/linux/mfd/ti_am335x_tscadc.h | 10 +- 4 files changed, 118 insertions(+), 223 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index d31dc5faeaaf..0c45caddd41c 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -531,7 +531,7 @@ config TOUCHSCREEN_TOUCHWIN config TOUCHSCREEN_TI_AM335X_TSC tristate "TI Touchscreen Interface" - depends on ARCH_OMAP2PLUS + depends on MFD_TI_AM335X_TSCADC help Say Y here if you have 4/5/8 wire touchscreen controller to be connected to the ADC controller on your TI AM335x SoC. diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index 462950acb97b..7a18a8a15228 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -27,106 +27,15 @@ #include #include -#define REG_RAWIRQSTATUS 0x024 -#define REG_IRQSTATUS 0x028 -#define REG_IRQENABLE 0x02C -#define REG_IRQWAKEUP 0x034 -#define REG_CTRL 0x040 -#define REG_ADCFSM 0x044 -#define REG_CLKDIV 0x04C -#define REG_SE 0x054 -#define REG_IDLECONFIG 0x058 -#define REG_CHARGECONFIG 0x05C -#define REG_CHARGEDELAY 0x060 -#define REG_STEPCONFIG(n) (0x64 + ((n - 1) * 8)) -#define REG_STEPDELAY(n) (0x68 + ((n - 1) * 8)) -#define REG_FIFO0CNT 0xE4 -#define REG_FIFO0THR 0xE8 -#define REG_FIFO1THR 0xF4 -#define REG_FIFO0 0x100 -#define REG_FIFO1 0x200 - -/* Register Bitfields */ -#define IRQWKUP_ENB BIT(0) - -/* Step Enable */ -#define STEPENB_MASK (0x1FFFF << 0) -#define STEPENB(val) (val << 0) -#define STPENB_STEPENB STEPENB(0x7FFF) - -/* IRQ enable */ -#define IRQENB_FIFO0THRES BIT(2) -#define IRQENB_FIFO1THRES BIT(5) -#define IRQENB_PENUP BIT(9) - -/* Step Configuration */ -#define STEPCONFIG_MODE_MASK (3 << 0) -#define STEPCONFIG_MODE(val) (val << 0) -#define STEPCONFIG_MODE_HWSYNC STEPCONFIG_MODE(2) -#define STEPCONFIG_AVG_MASK (7 << 2) -#define STEPCONFIG_AVG(val) (val << 2) -#define STEPCONFIG_AVG_16 STEPCONFIG_AVG(4) -#define STEPCONFIG_XPP BIT(5) -#define STEPCONFIG_XNN BIT(6) -#define STEPCONFIG_YPP BIT(7) -#define STEPCONFIG_YNN BIT(8) -#define STEPCONFIG_XNP BIT(9) -#define STEPCONFIG_YPN BIT(10) -#define STEPCONFIG_INM_MASK (0xF << 15) -#define STEPCONFIG_INM(val) (val << 15) -#define STEPCONFIG_INM_ADCREFM STEPCONFIG_INM(8) -#define STEPCONFIG_INP_MASK (0xF << 19) -#define STEPCONFIG_INP(val) (val << 19) -#define STEPCONFIG_INP_AN2 STEPCONFIG_INP(2) -#define STEPCONFIG_INP_AN3 STEPCONFIG_INP(3) -#define STEPCONFIG_INP_AN4 STEPCONFIG_INP(4) -#define STEPCONFIG_INP_ADCREFM STEPCONFIG_INP(8) -#define STEPCONFIG_FIFO1 BIT(26) - -/* Delay register */ -#define STEPDELAY_OPEN_MASK (0x3FFFF << 0) -#define STEPDELAY_OPEN(val) (val << 0) -#define STEPCONFIG_OPENDLY STEPDELAY_OPEN(0x098) - -/* Charge Config */ -#define STEPCHARGE_RFP_MASK (7 << 12) -#define STEPCHARGE_RFP(val) (val << 12) -#define STEPCHARGE_RFP_XPUL STEPCHARGE_RFP(1) -#define STEPCHARGE_INM_MASK (0xF << 15) -#define STEPCHARGE_INM(val) (val << 15) -#define STEPCHARGE_INM_AN1 STEPCHARGE_INM(1) -#define STEPCHARGE_INP_MASK (0xF << 19) -#define STEPCHARGE_INP(val) (val << 19) -#define STEPCHARGE_INP_AN1 STEPCHARGE_INP(1) -#define STEPCHARGE_RFM_MASK (3 << 23) -#define STEPCHARGE_RFM(val) (val << 23) -#define STEPCHARGE_RFM_XNUR STEPCHARGE_RFM(1) - -/* Charge delay */ -#define CHARGEDLY_OPEN_MASK (0x3FFFF << 0) -#define CHARGEDLY_OPEN(val) (val << 0) -#define CHARGEDLY_OPENDLY CHARGEDLY_OPEN(1) - -/* Control register */ -#define CNTRLREG_TSCSSENB BIT(0) -#define CNTRLREG_STEPID BIT(1) -#define CNTRLREG_STEPCONFIGWRT BIT(2) -#define CNTRLREG_AFE_CTRL_MASK (3 << 5) -#define CNTRLREG_AFE_CTRL(val) (val << 5) -#define CNTRLREG_4WIRE CNTRLREG_AFE_CTRL(1) -#define CNTRLREG_5WIRE CNTRLREG_AFE_CTRL(2) -#define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) -#define CNTRLREG_TSCENB BIT(7) +#include #define ADCFSM_STEPID 0x10 #define SEQ_SETTLE 275 -#define ADC_CLK 3000000 #define MAX_12BIT ((1 << 12) - 1) struct titsc { struct input_dev *input; - struct clk *tsc_ick; - void __iomem *tsc_base; + struct ti_tscadc_dev *mfd_tscadc; unsigned int irq; unsigned int wires; unsigned int x_plate_resistance; @@ -136,13 +45,13 @@ struct titsc { static unsigned int titsc_readl(struct titsc *ts, unsigned int reg) { - return readl(ts->tsc_base + reg); + return readl(ts->mfd_tscadc->tscadc_base + reg); } static void titsc_writel(struct titsc *tsc, unsigned int reg, unsigned int val) { - writel(val, tsc->tsc_base + reg); + writel(val, tsc->mfd_tscadc->tscadc_base + reg); } static void titsc_step_config(struct titsc *ts_dev) @@ -219,17 +128,7 @@ static void titsc_step_config(struct titsc *ts_dev) titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 2), STEPCONFIG_OPENDLY); - titsc_writel(ts_dev, REG_SE, STPENB_STEPENB); -} - -static void titsc_idle_config(struct titsc *ts_config) -{ - unsigned int idleconfig; - - idleconfig = STEPCONFIG_YNN | - STEPCONFIG_INM_ADCREFM | - STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM; - titsc_writel(ts_config, REG_IDLECONFIG, idleconfig); + titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC); } static void titsc_read_coordinates(struct titsc *ts_dev, @@ -239,7 +138,7 @@ static void titsc_read_coordinates(struct titsc *ts_dev, unsigned int prev_val_x = ~0, prev_val_y = ~0; unsigned int prev_diff_x = ~0, prev_diff_y = ~0; unsigned int read, diff; - unsigned int i; + unsigned int i, channel; /* * Delta filter is used to remove large variations in sampled @@ -250,21 +149,32 @@ static void titsc_read_coordinates(struct titsc *ts_dev, * if true the value is reported to the sub system. */ for (i = 0; i < fifocount - 1; i++) { - read = titsc_readl(ts_dev, REG_FIFO0) & 0xfff; - diff = abs(read - prev_val_x); - if (diff < prev_diff_x) { - prev_diff_x = diff; - *x = read; + read = titsc_readl(ts_dev, REG_FIFO0); + channel = read & 0xf0000; + channel = channel >> 0x10; + if ((channel >= 0) && (channel < ts_dev->steps_to_configure)) { + read &= 0xfff; + diff = abs(read - prev_val_x); + if (diff < prev_diff_x) { + prev_diff_x = diff; + *x = read; + } + prev_val_x = read; } - prev_val_x = read; - read = titsc_readl(ts_dev, REG_FIFO1) & 0xfff; - diff = abs(read - prev_val_y); - if (diff < prev_diff_y) { - prev_diff_y = diff; - *y = read; + read = titsc_readl(ts_dev, REG_FIFO1); + channel = read & 0xf0000; + channel = channel >> 0x10; + if ((channel >= ts_dev->steps_to_configure) && + (channel < (2 * ts_dev->steps_to_configure - 1))) { + read &= 0xfff; + diff = abs(read - prev_val_y); + if (diff < prev_diff_y) { + prev_diff_y = diff; + *y = read; + } + prev_val_y = read; } - prev_val_y = read; } } @@ -276,6 +186,8 @@ static irqreturn_t titsc_irq(int irq, void *dev) unsigned int x = 0, y = 0; unsigned int z1, z2, z; unsigned int fsm; + unsigned int fifo1count, fifo0count; + int i; status = titsc_readl(ts_dev, REG_IRQSTATUS); if (status & IRQENB_FIFO0THRES) { @@ -284,6 +196,14 @@ static irqreturn_t titsc_irq(int irq, void *dev) z1 = titsc_readl(ts_dev, REG_FIFO0) & 0xfff; z2 = titsc_readl(ts_dev, REG_FIFO1) & 0xfff; + fifo1count = titsc_readl(ts_dev, REG_FIFO1CNT); + for (i = 0; i < fifo1count; i++) + titsc_readl(ts_dev, REG_FIFO1); + + fifo0count = titsc_readl(ts_dev, REG_FIFO0CNT); + for (i = 0; i < fifo0count; i++) + titsc_readl(ts_dev, REG_FIFO0); + if (ts_dev->pen_down && z1 != 0 && z2 != 0) { /* * Calculate pressure using formula @@ -330,7 +250,7 @@ static irqreturn_t titsc_irq(int irq, void *dev) titsc_writel(ts_dev, REG_IRQSTATUS, irqclr); - titsc_writel(ts_dev, REG_SE, STPENB_STEPENB); + titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC); return IRQ_HANDLED; } @@ -340,28 +260,16 @@ static irqreturn_t titsc_irq(int irq, void *dev) static int __devinit titsc_probe(struct platform_device *pdev) { - const struct tsc_data *pdata = pdev->dev.platform_data; - struct resource *res; struct titsc *ts_dev; struct input_dev *input_dev; - struct clk *clk; + struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data; + struct mfd_tscadc_board *pdata; int err; - int clk_value, ctrl, irq; - if (!pdata) { - dev_err(&pdev->dev, "missing platform data.\n"); - return -EINVAL; - } - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "no memory resource defined.\n"); - return -EINVAL; - } + pdata = tscadc_dev->dev->platform_data; - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "no irq ID is specified.\n"); + if (!pdata) { + dev_err(&pdev->dev, "Could not find platform data\n"); return -EINVAL; } @@ -374,85 +282,26 @@ static int __devinit titsc_probe(struct platform_device *pdev) goto err_free_mem; } + tscadc_dev->tsc = ts_dev; + ts_dev->mfd_tscadc = tscadc_dev; ts_dev->input = input_dev; - ts_dev->irq = irq; - ts_dev->wires = pdata->wires; - ts_dev->x_plate_resistance = pdata->x_plate_resistance; - ts_dev->steps_to_configure = pdata->steps_to_configure; - - res = request_mem_region(res->start, resource_size(res), pdev->name); - if (!res) { - dev_err(&pdev->dev, "failed to reserve registers.\n"); - err = -EBUSY; - goto err_free_mem; - } - - ts_dev->tsc_base = ioremap(res->start, resource_size(res)); - if (!ts_dev->tsc_base) { - dev_err(&pdev->dev, "failed to map registers.\n"); - err = -ENOMEM; - goto err_release_mem_region; - } + ts_dev->irq = tscadc_dev->irq; + ts_dev->wires = pdata->tsc_init->wires; + ts_dev->x_plate_resistance = pdata->tsc_init->x_plate_resistance; + ts_dev->steps_to_configure = pdata->tsc_init->steps_to_configure; err = request_irq(ts_dev->irq, titsc_irq, 0, pdev->dev.driver->name, ts_dev); if (err) { dev_err(&pdev->dev, "failed to allocate irq.\n"); - goto err_unmap_regs; - } - - ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick"); - if (IS_ERR(ts_dev->tsc_ick)) { - dev_err(&pdev->dev, "failed to get TSC ick\n"); - goto err_free_irq; - } - clk_enable(ts_dev->tsc_ick); - - clk = clk_get(&pdev->dev, "adc_tsc_fck"); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "failed to get TSC fck\n"); - err = PTR_ERR(clk); - goto err_disable_clk; - } - - clk_value = clk_get_rate(clk) / ADC_CLK; - clk_put(clk); - - if (clk_value < 7) { - dev_err(&pdev->dev, "clock input less than min clock requirement\n"); - goto err_disable_clk; - } - /* CLKDIV needs to be configured to the value minus 1 */ - titsc_writel(ts_dev, REG_CLKDIV, clk_value - 1); - - /* Enable wake-up of the SoC using touchscreen */ - titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB); - - ctrl = CNTRLREG_STEPCONFIGWRT | - CNTRLREG_TSCENB | - CNTRLREG_STEPID; - switch (ts_dev->wires) { - case 4: - ctrl |= CNTRLREG_4WIRE; - break; - case 5: - ctrl |= CNTRLREG_5WIRE; - break; - case 8: - ctrl |= CNTRLREG_8WIRE; - break; + goto err_free_mem; } - titsc_writel(ts_dev, REG_CTRL, ctrl); - titsc_idle_config(ts_dev); titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES); titsc_step_config(ts_dev); titsc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure); - ctrl |= CNTRLREG_TSCSSENB; - titsc_writel(ts_dev, REG_CTRL, ctrl); - - input_dev->name = "ti-tsc-adc"; + input_dev->name = "ti-tsc"; input_dev->dev.parent = &pdev->dev; input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); @@ -465,20 +314,13 @@ static int __devinit titsc_probe(struct platform_device *pdev) /* register to the input system */ err = input_register_device(input_dev); if (err) - goto err_disable_clk; + goto err_free_irq; platform_set_drvdata(pdev, ts_dev); return 0; -err_disable_clk: - clk_disable(ts_dev->tsc_ick); - clk_put(ts_dev->tsc_ick); err_free_irq: free_irq(ts_dev->irq, ts_dev); -err_unmap_regs: - iounmap(ts_dev->tsc_base); -err_release_mem_region: - release_mem_region(res->start, resource_size(res)); err_free_mem: input_free_device(input_dev); kfree(ts_dev); @@ -487,32 +329,66 @@ err_free_mem: static int __devexit titsc_remove(struct platform_device *pdev) { - struct titsc *ts_dev = platform_get_drvdata(pdev); - struct resource *res; + struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data; + struct titsc *ts_dev = tscadc_dev->tsc; free_irq(ts_dev->irq, ts_dev); input_unregister_device(ts_dev->input); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - iounmap(ts_dev->tsc_base); - release_mem_region(res->start, resource_size(res)); + platform_set_drvdata(pdev, NULL); + kfree(ts_dev); + return 0; +} - clk_disable(ts_dev->tsc_ick); - clk_put(ts_dev->tsc_ick); +#ifdef CONFIG_PM +static int titsc_suspend(struct device *dev) +{ + struct ti_tscadc_dev *tscadc_dev = dev->platform_data; + struct titsc *ts_dev = tscadc_dev->tsc; + unsigned int idle; + + if (device_may_wakeup(tscadc_dev->dev)) { + idle = titsc_readl(ts_dev, REG_IRQENABLE); + titsc_writel(ts_dev, REG_IRQENABLE, + (idle | IRQENB_HW_PEN)); + titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB); + } + return 0; +} - kfree(ts_dev); +static int titsc_resume(struct device *dev) +{ + struct ti_tscadc_dev *tscadc_dev = dev->platform_data; + struct titsc *ts_dev = tscadc_dev->tsc; - platform_set_drvdata(pdev, NULL); + if (device_may_wakeup(tscadc_dev->dev)) { + titsc_writel(ts_dev, REG_IRQWAKEUP, + 0x00); + titsc_writel(ts_dev, REG_IRQCLR, IRQENB_HW_PEN); + } + titsc_step_config(ts_dev); + titsc_writel(ts_dev, REG_FIFO0THR, + ts_dev->steps_to_configure); return 0; } +static const struct dev_pm_ops titsc_pm_ops = { + .suspend = titsc_suspend, + .resume = titsc_resume, +}; +#define TITSC_PM_OPS (&titsc_pm_ops) +#else +#define TITSC_PM_OPS NULL +#endif + static struct platform_driver ti_tsc_driver = { .probe = titsc_probe, .remove = __devexit_p(titsc_remove), .driver = { .name = "tsc", .owner = THIS_MODULE, + .pm = TITSC_PM_OPS, }, }; module_platform_driver(ti_tsc_driver); diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 14df67bc390f..d812be4b61df 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -24,6 +24,7 @@ #include #include +#include static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg) { @@ -62,15 +63,19 @@ static int __devinit ti_tscadc_probe(struct platform_device *pdev) struct resource *res; struct clk *clk; struct mfd_tscadc_board *pdata = pdev->dev.platform_data; + struct mfd_cell *cell; int irq; int err, ctrl; int clk_value, clock_rate; + int tsc_wires; if (!pdata) { dev_err(&pdev->dev, "Could not find platform data\n"); return -EINVAL; } + tsc_wires = pdata->tsc_init->wires; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "no memory resource defined.\n"); @@ -161,6 +166,12 @@ static int __devinit ti_tscadc_probe(struct platform_device *pdev) ctrl |= CNTRLREG_TSCSSENB; tscadc_writel(tscadc, REG_CTRL, ctrl); + /* TSC Cell */ + cell = &tscadc->cells[TSC_CELL]; + cell->name = "tsc"; + cell->platform_data = tscadc; + cell->pdata_size = sizeof(*tscadc); + err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells, TSCADC_CELLS, NULL, 0, NULL); if (err < 0) diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index b7232b157061..fc18b2ef753f 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -47,6 +47,7 @@ #define STEPENB_MASK (0x1FFFF << 0) #define STEPENB(val) ((val) << 0) #define STPENB_STEPENB STEPENB(0x1FFFF) +#define STPENB_STEPENB_TC STEPENB(0x1FFF) /* IRQ enable */ #define IRQENB_HW_PEN BIT(0) @@ -120,7 +121,11 @@ #define ADC_CLK 3000000 #define MAX_CLK_DIV 7 -#define TSCADC_CELLS 0 +#define TSCADC_CELLS 1 + +enum tscadc_cells { + TSC_CELL, +}; struct mfd_tscadc_board { struct tsc_data *tsc_init; @@ -132,6 +137,9 @@ struct ti_tscadc_dev { void __iomem *tscadc_base; int irq; struct mfd_cell cells[TSCADC_CELLS]; + + /* tsc device */ + struct titsc *tsc; }; #endif -- cgit v1.2.3 From 5e53a69b44e893227b046a7bc74db3cb40d7f39b Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Tue, 16 Oct 2012 12:55:45 +0530 Subject: IIO : ADC: tiadc: Add support of TI's ADC driver This patch adds support for TI's ADC driver. This is a multifunctional device. Analog input lines are provided on which voltage measurements can be carried out. You can have upto 8 input lines. Signed-off-by: Patil, Rachna Acked-by: Jonathan Cameron Signed-off-by: Samuel Ortiz --- drivers/iio/adc/Kconfig | 7 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/ti_am335x_adc.c | 260 ++++++++++++++++++++++++++++ drivers/mfd/ti_am335x_tscadc.c | 18 +- include/linux/mfd/ti_am335x_tscadc.h | 9 +- include/linux/platform_data/ti_am335x_adc.h | 14 ++ 6 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 drivers/iio/adc/ti_am335x_adc.c create mode 100644 include/linux/platform_data/ti_am335x_adc.h (limited to 'include/linux') diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 492758120338..1401ed1af39f 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -60,4 +60,11 @@ config LP8788_ADC help Say yes here to build support for TI LP8788 ADC. +config TI_AM335X_ADC + tristate "TI's ADC driver" + depends on MFD_TI_AM335X_TSCADC + help + Say yes here to build support for Texas Instruments ADC + driver which is also a MFD client. + endmenu diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 900995d5e179..4410a90fc84b 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_AD7476) += ad7476.o obj-$(CONFIG_AD7791) += ad7791.o obj-$(CONFIG_AT91_ADC) += at91_adc.o obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o +obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c new file mode 100644 index 000000000000..02a43c87a8a3 --- /dev/null +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -0,0 +1,260 @@ +/* + * TI ADC MFD driver + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct tiadc_device { + struct ti_tscadc_dev *mfd_tscadc; + int channels; +}; + +static unsigned int tiadc_readl(struct tiadc_device *adc, unsigned int reg) +{ + return readl(adc->mfd_tscadc->tscadc_base + reg); +} + +static void tiadc_writel(struct tiadc_device *adc, unsigned int reg, + unsigned int val) +{ + writel(val, adc->mfd_tscadc->tscadc_base + reg); +} + +static void tiadc_step_config(struct tiadc_device *adc_dev) +{ + unsigned int stepconfig; + int i, channels = 0, steps; + + /* + * There are 16 configurable steps and 8 analog input + * lines available which are shared between Touchscreen and ADC. + * + * Steps backwards i.e. from 16 towards 0 are used by ADC + * depending on number of input lines needed. + * Channel would represent which analog input + * needs to be given to ADC to digitalize data. + */ + + steps = TOTAL_STEPS - adc_dev->channels; + channels = TOTAL_CHANNELS - adc_dev->channels; + + stepconfig = STEPCONFIG_AVG_16 | STEPCONFIG_FIFO1; + + for (i = (steps + 1); i <= TOTAL_STEPS; i++) { + tiadc_writel(adc_dev, REG_STEPCONFIG(i), + stepconfig | STEPCONFIG_INP(channels)); + tiadc_writel(adc_dev, REG_STEPDELAY(i), + STEPCONFIG_OPENDLY); + channels++; + } + tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB); +} + +static int tiadc_channel_init(struct iio_dev *indio_dev, int channels) +{ + struct iio_chan_spec *chan_array; + int i; + + indio_dev->num_channels = channels; + chan_array = kcalloc(indio_dev->num_channels, + sizeof(struct iio_chan_spec), GFP_KERNEL); + + if (chan_array == NULL) + return -ENOMEM; + + for (i = 0; i < (indio_dev->num_channels); i++) { + struct iio_chan_spec *chan = chan_array + i; + chan->type = IIO_VOLTAGE; + chan->indexed = 1; + chan->channel = i; + chan->info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT; + } + + indio_dev->channels = chan_array; + + return indio_dev->num_channels; +} + +static void tiadc_channels_remove(struct iio_dev *indio_dev) +{ + kfree(indio_dev->channels); +} + +static int tiadc_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct tiadc_device *adc_dev = iio_priv(indio_dev); + int i; + unsigned int fifo1count, readx1; + + /* + * When the sub-system is first enabled, + * the sequencer will always start with the + * lowest step (1) and continue until step (16). + * For ex: If we have enabled 4 ADC channels and + * currently use only 1 out of them, the + * sequencer still configures all the 4 steps, + * leading to 3 unwanted data. + * Hence we need to flush out this data. + */ + + fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT); + for (i = 0; i < fifo1count; i++) { + readx1 = tiadc_readl(adc_dev, REG_FIFO1); + if (i == chan->channel) + *val = readx1 & 0xfff; + } + tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB); + + return IIO_VAL_INT; +} + +static const struct iio_info tiadc_info = { + .read_raw = &tiadc_read_raw, +}; + +static int __devinit tiadc_probe(struct platform_device *pdev) +{ + struct iio_dev *indio_dev; + struct tiadc_device *adc_dev; + struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data; + struct mfd_tscadc_board *pdata; + int err; + + pdata = tscadc_dev->dev->platform_data; + if (!pdata || !pdata->adc_init) { + dev_err(&pdev->dev, "Could not find platform data\n"); + return -EINVAL; + } + + indio_dev = iio_device_alloc(sizeof(struct tiadc_device)); + if (indio_dev == NULL) { + dev_err(&pdev->dev, "failed to allocate iio device\n"); + err = -ENOMEM; + goto err_ret; + } + adc_dev = iio_priv(indio_dev); + + adc_dev->mfd_tscadc = tscadc_dev; + adc_dev->channels = pdata->adc_init->adc_channels; + + indio_dev->dev.parent = &pdev->dev; + indio_dev->name = dev_name(&pdev->dev); + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->info = &tiadc_info; + + tiadc_step_config(adc_dev); + + err = tiadc_channel_init(indio_dev, adc_dev->channels); + if (err < 0) + goto err_free_device; + + err = iio_device_register(indio_dev); + if (err) + goto err_free_channels; + + platform_set_drvdata(pdev, indio_dev); + + return 0; + +err_free_channels: + tiadc_channels_remove(indio_dev); +err_free_device: + iio_device_free(indio_dev); +err_ret: + return err; +} + +static int __devexit tiadc_remove(struct platform_device *pdev) +{ + struct iio_dev *indio_dev = platform_get_drvdata(pdev); + + iio_device_unregister(indio_dev); + tiadc_channels_remove(indio_dev); + + iio_device_free(indio_dev); + + return 0; +} + +#ifdef CONFIG_PM +static int tiadc_suspend(struct device *dev) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct tiadc_device *adc_dev = iio_priv(indio_dev); + struct ti_tscadc_dev *tscadc_dev = dev->platform_data; + unsigned int idle; + + if (!device_may_wakeup(tscadc_dev->dev)) { + idle = tiadc_readl(adc_dev, REG_CTRL); + idle &= ~(CNTRLREG_TSCSSENB); + tiadc_writel(adc_dev, REG_CTRL, (idle | + CNTRLREG_POWERDOWN)); + } + + return 0; +} + +static int tiadc_resume(struct device *dev) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct tiadc_device *adc_dev = iio_priv(indio_dev); + unsigned int restore; + + /* Make sure ADC is powered up */ + restore = tiadc_readl(adc_dev, REG_CTRL); + restore &= ~(CNTRLREG_POWERDOWN); + tiadc_writel(adc_dev, REG_CTRL, restore); + + tiadc_step_config(adc_dev); + + return 0; +} + +static const struct dev_pm_ops tiadc_pm_ops = { + .suspend = tiadc_suspend, + .resume = tiadc_resume, +}; +#define TIADC_PM_OPS (&tiadc_pm_ops) +#else +#define TIADC_PM_OPS NULL +#endif + +static struct platform_driver tiadc_driver = { + .driver = { + .name = "tiadc", + .owner = THIS_MODULE, + .pm = TIADC_PM_OPS, + }, + .probe = tiadc_probe, + .remove = __devexit_p(tiadc_remove), +}; + +module_platform_driver(tiadc_driver); + +MODULE_DESCRIPTION("TI ADC controller driver"); +MODULE_AUTHOR("Rachna Patil "); +MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index d812be4b61df..e947dd8bbcc3 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -25,6 +25,7 @@ #include #include +#include static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg) { @@ -67,14 +68,23 @@ static int __devinit ti_tscadc_probe(struct platform_device *pdev) int irq; int err, ctrl; int clk_value, clock_rate; - int tsc_wires; + int tsc_wires, adc_channels = 0, total_channels; if (!pdata) { dev_err(&pdev->dev, "Could not find platform data\n"); return -EINVAL; } + if (pdata->adc_init) + adc_channels = pdata->adc_init->adc_channels; + tsc_wires = pdata->tsc_init->wires; + total_channels = tsc_wires + adc_channels; + + if (total_channels > 8) { + dev_err(&pdev->dev, "Number of i/p channels more than 8\n"); + return -EINVAL; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { @@ -172,6 +182,12 @@ static int __devinit ti_tscadc_probe(struct platform_device *pdev) cell->platform_data = tscadc; cell->pdata_size = sizeof(*tscadc); + /* ADC Cell */ + cell = &tscadc->cells[ADC_CELL]; + cell->name = "tiadc"; + cell->platform_data = tscadc; + cell->pdata_size = sizeof(*tscadc); + err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells, TSCADC_CELLS, NULL, 0, NULL); if (err < 0) diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index fc18b2ef753f..c79ad5d2f271 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -120,15 +120,19 @@ #define ADC_CLK 3000000 #define MAX_CLK_DIV 7 +#define TOTAL_STEPS 16 +#define TOTAL_CHANNELS 8 -#define TSCADC_CELLS 1 +#define TSCADC_CELLS 2 enum tscadc_cells { TSC_CELL, + ADC_CELL, }; struct mfd_tscadc_board { struct tsc_data *tsc_init; + struct adc_data *adc_init; }; struct ti_tscadc_dev { @@ -140,6 +144,9 @@ struct ti_tscadc_dev { /* tsc device */ struct titsc *tsc; + + /* adc device */ + struct adc_device *adc; }; #endif diff --git a/include/linux/platform_data/ti_am335x_adc.h b/include/linux/platform_data/ti_am335x_adc.h new file mode 100644 index 000000000000..e41d5834cb84 --- /dev/null +++ b/include/linux/platform_data/ti_am335x_adc.h @@ -0,0 +1,14 @@ +#ifndef __LINUX_TI_AM335X_ADC_H +#define __LINUX_TI_AM335X_ADC_H + +/** + * struct adc_data ADC Input information + * @adc_channels: Number of analog inputs + * available for ADC. + */ + +struct adc_data { + unsigned int adc_channels; +}; + +#endif -- cgit v1.2.3 From 216b6cbdcbd86b1db0754d58886b466ae31f5a63 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 29 Aug 2012 10:10:10 -0400 Subject: exportfs: add FILEID_INVALID to indicate invalid fid_type This commit adds FILEID_INVALID = 0xff in fid_type to indicate invalid fid_type It avoids using magic number 255 Signed-off-by: Namjae Jeon Signed-off-by: Vivek Trivedi Signed-off-by: J. Bruce Fields --- fs/exportfs/expfs.c | 4 ++-- fs/fhandle.c | 2 +- fs/nfsd/nfsfh.c | 4 ++-- include/linux/exportfs.h | 5 +++++ 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 29ab099e3e08..f1f1c59c2966 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -322,10 +322,10 @@ static int export_encode_fh(struct inode *inode, struct fid *fid, if (parent && (len < 4)) { *max_len = 4; - return 255; + return FILEID_INVALID; } else if (len < 2) { *max_len = 2; - return 255; + return FILEID_INVALID; } len = 2; diff --git a/fs/fhandle.c b/fs/fhandle.c index f775bfdd6e4a..26f12b95702a 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -52,7 +52,7 @@ static long do_sys_name_to_handle(struct path *path, handle_bytes = handle_dwords * sizeof(u32); handle->handle_bytes = handle_bytes; if ((handle->handle_bytes > f_handle.handle_bytes) || - (retval == 255) || (retval == -ENOSPC)) { + (retval == FILEID_INVALID) || (retval == -ENOSPC)) { /* As per old exportfs_encode_fh documentation * we could return ENOSPC to indicate overflow * But file system returned 255 always. So handle diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 032af381b3aa..814afaa4458a 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) { + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { fh_put(fhp); return nfserr_opnotsupp; } @@ -603,7 +603,7 @@ fh_update(struct svc_fh *fhp) goto out; _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } out: diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 12291a7ee275..0e1452546300 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -83,6 +83,11 @@ enum fid_type { * 64 bit parent inode number. */ FILEID_NILFS_WITH_PARENT = 0x62, + + /* + * Filesystems must not use 0xff file ID. + */ + FILEID_INVALID = 0xff, }; struct fid { -- cgit v1.2.3 From ada8a8a13b13a2749818524a7949935f68d2b3eb Mon Sep 17 00:00:00 2001 From: Wei WANG Date: Mon, 29 Oct 2012 13:49:33 +0800 Subject: mfd: Add realtek pcie card reader driver Realtek PCI-E card reader driver adapts requests from upper-level sdmmc/memstick layer to the real physical card reader. Signed-off-by: Wei WANG Reviewed-by: Arnd Bergmann Tested-by: Borislav Petkov Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 9 + drivers/mfd/Makefile | 3 + drivers/mfd/rtl8411.c | 251 ++++++++ drivers/mfd/rts5209.c | 223 +++++++ drivers/mfd/rts5229.c | 205 +++++++ drivers/mfd/rtsx_pcr.c | 1251 +++++++++++++++++++++++++++++++++++++++ drivers/mfd/rtsx_pcr.h | 32 + include/linux/mfd/rtsx_common.h | 48 ++ include/linux/mfd/rtsx_pci.h | 794 +++++++++++++++++++++++++ 9 files changed, 2816 insertions(+) create mode 100644 drivers/mfd/rtl8411.c create mode 100644 drivers/mfd/rts5209.c create mode 100644 drivers/mfd/rts5229.c create mode 100644 drivers/mfd/rtsx_pcr.c create mode 100644 drivers/mfd/rtsx_pcr.h create mode 100644 include/linux/mfd/rtsx_common.h create mode 100644 include/linux/mfd/rtsx_pci.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 9bba7f78ff36..d0cb4d4bc2cc 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -63,6 +63,15 @@ config MFD_SM501_GPIO lines on the SM501. The platform data is used to supply the base number for the first GPIO line to register. +config MFD_RTSX_PCI + tristate "Support for Realtek PCI-E card reader" + depends on PCI + help + This supports for Realtek PCI-Express card reader including rts5209, + rts5229, rtl8411, etc. Realtek card reader supports access to many + types of memory cards, such as Memory Stick, Memory Stick Pro, + Secure Digital and MultiMediaCard. + config MFD_ASIC3 bool "Support for Compaq ASIC3" depends on GENERIC_HARDIRQS && GPIOLIB && ARM diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 442c17e40741..a4093a44304a 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -9,6 +9,9 @@ obj-$(CONFIG_MFD_88PM805) += 88pm805.o 88pm80x.o obj-$(CONFIG_MFD_SM501) += sm501.o obj-$(CONFIG_MFD_ASIC3) += asic3.o tmio_core.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o +obj-$(CONFIG_MFD_RTSX_PCI) += rtsx_pci.o + obj-$(CONFIG_HTC_EGPIO) += htc-egpio.o obj-$(CONFIG_HTC_PASIC3) += htc-pasic3.o obj-$(CONFIG_HTC_I2CPLD) += htc-i2cpld.o diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c new file mode 100644 index 000000000000..89f046ca9e41 --- /dev/null +++ b/drivers/mfd/rtl8411.c @@ -0,0 +1,251 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#include +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, SYS_VER, &val); + return val & 0x0F; +} + +static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE); +} + +static int rtl8411_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); +} + +static int rtl8411_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); +} + +static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); +} + +static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); +} + +static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL, + BPP_LDO_POWB, BPP_LDO_SUSPEND); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON); + if (err < 0) + return err; + + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON); + if (err < 0) + return err; + + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_ON); + if (err < 0) + return err; + + return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON); +} + +static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err; + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_OFF); + if (err < 0) + return err; + + return rtsx_pci_write_register(pcr, LDO_CTL, + BPP_LDO_POWB, BPP_LDO_SUSPEND); +} + +static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr) +{ + unsigned int card_exist; + + card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); + card_exist &= CARD_EXIST; + if (!card_exist) { + /* Enable card CD */ + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, CD_ENABLE); + /* Enable card interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00); + return 0; + } + + if (hweight32(card_exist) > 1) { + rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); + msleep(100); + + card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); + if (card_exist & MS_EXIST) + card_exist = MS_EXIST; + else if (card_exist & SD_EXIST) + card_exist = SD_EXIST; + else + card_exist = 0; + + rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_OFF); + + dev_dbg(&(pcr->pci->dev), + "After CD deglitch, card_exist = 0x%x\n", + card_exist); + } + + if (card_exist & MS_EXIST) { + /* Disable SD interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40); + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, MS_CD_EN_ONLY); + } else if (card_exist & SD_EXIST) { + /* Disable MS interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80); + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, SD_CD_EN_ONLY); + } + + return card_exist; +} + +static const struct pcr_ops rtl8411_pcr_ops = { + .extra_init_hw = rtl8411_extra_init_hw, + .optimize_phy = NULL, + .turn_on_led = rtl8411_turn_on_led, + .turn_off_led = rtl8411_turn_off_led, + .enable_auto_blink = rtl8411_enable_auto_blink, + .disable_auto_blink = rtl8411_disable_auto_blink, + .card_power_on = rtl8411_card_power_on, + .card_power_off = rtl8411_card_power_off, + .cd_deglitch = rtl8411_cd_deglitch, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +void rtl8411_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rtl8411_pcr_ops; + + pcr->ic_version = rtl8411_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rtl8411_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rtl8411_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rtl8411_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rtl8411_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c new file mode 100644 index 000000000000..283a4f148084 --- /dev/null +++ b/drivers/mfd/rts5209.c @@ -0,0 +1,223 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + val = rtsx_pci_readb(pcr, 0x1C); + return val & 0x0F; +} + +static void rts5209_init_vendor_cfg(struct rtsx_pcr *pcr) +{ + u32 val; + + rtsx_pci_read_config_dword(pcr, 0x724, &val); + dev_dbg(&(pcr->pci->dev), "Cfg 0x724: 0x%x\n", val); + + if (!(val & 0x80)) { + if (val & 0x08) + pcr->ms_pmos = false; + else + pcr->ms_pmos = true; + } +} + +static int rts5209_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Turn off LED */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03); + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5209_optimize_phy(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966); +} + +static int rts5209_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); +} + +static int rts5209_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); +} + +static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); +} + +static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); +} + +static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + u8 pwr_mask, partial_pwr_on, pwr_on; + + pwr_mask = SD_POWER_MASK; + partial_pwr_on = SD_PARTIAL_POWER_ON; + pwr_on = SD_POWER_ON; + + if (pcr->ms_pmos && (card == RTSX_MS_CARD)) { + pwr_mask = MS_POWER_MASK; + partial_pwr_on = MS_PARTIAL_POWER_ON; + pwr_on = MS_POWER_ON; + } + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + pwr_mask, partial_pwr_on); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x04); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + return 0; +} + +static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card) +{ + u8 pwr_mask, pwr_off; + + pwr_mask = SD_POWER_MASK; + pwr_off = SD_POWER_OFF; + + if (pcr->ms_pmos && (card == RTSX_MS_CARD)) { + pwr_mask = MS_POWER_MASK; + pwr_off = MS_POWER_OFF; + } + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0X06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static const struct pcr_ops rts5209_pcr_ops = { + .extra_init_hw = rts5209_extra_init_hw, + .optimize_phy = rts5209_optimize_phy, + .turn_on_led = rts5209_turn_on_led, + .turn_off_led = rts5209_turn_off_led, + .enable_auto_blink = rts5209_enable_auto_blink, + .disable_auto_blink = rts5209_disable_auto_blink, + .card_power_on = rts5209_card_power_on, + .card_power_off = rts5209_card_power_off, + .cd_deglitch = NULL, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5209_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5209_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5209_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5209_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5209_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | + EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT; + pcr->num_slots = 2; + pcr->ops = &rts5209_pcr_ops; + + rts5209_init_vendor_cfg(pcr); + + pcr->ic_version = rts5209_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c new file mode 100644 index 000000000000..b9dbab266fda --- /dev/null +++ b/drivers/mfd/rts5229.c @@ -0,0 +1,205 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static int rts5229_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5229_optimize_phy(struct rtsx_pcr *pcr) +{ + /* Optimize RX sensitivity */ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42); +} + +static int rts5229_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rts5229_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + return 0; +} + +static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK | PMOS_STRG_MASK, + SD_POWER_OFF | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0X00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static const struct pcr_ops rts5229_pcr_ops = { + .extra_init_hw = rts5229_extra_init_hw, + .optimize_phy = rts5229_optimize_phy, + .turn_on_led = rts5229_turn_on_led, + .turn_off_led = rts5229_turn_off_led, + .enable_auto_blink = rts5229_enable_auto_blink, + .disable_auto_blink = rts5229_disable_auto_blink, + .card_power_on = rts5229_card_power_on, + .card_power_off = rts5229_card_power_off, + .cd_deglitch = NULL, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* For RTS5229 version C */ +static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* For RTS5229 version C */ +static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5229_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5229_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5229_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5229_pcr_ops; + + pcr->ic_version = rts5229_get_ic_version(pcr); + if (pcr->ic_version == IC_VER_C) { + pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2; + pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2; + } else { + pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1; + pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1; + } + pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c new file mode 100644 index 000000000000..56d4377c62c2 --- /dev/null +++ b/drivers/mfd/rtsx_pcr.c @@ -0,0 +1,1251 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rtsx_pcr.h" + +static bool msi_en = true; +module_param(msi_en, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(msi_en, "Enable MSI"); + +static DEFINE_IDR(rtsx_pci_idr); +static DEFINE_SPINLOCK(rtsx_pci_lock); + +static struct mfd_cell rtsx_pcr_cells[] = { + [RTSX_SD_CARD] = { + .name = DRV_NAME_RTSX_PCI_SDMMC, + }, + [RTSX_MS_CARD] = { + .name = DRV_NAME_RTSX_PCI_MS, + }, +}; + +static DEFINE_PCI_DEVICE_TABLE(rtsx_pci_ids) = { + { PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, rtsx_pci_ids); + +void rtsx_pci_start_run(struct rtsx_pcr *pcr) +{ + /* If pci device removed, don't queue idle work any more */ + if (pcr->remove_pci) + return; + + if (pcr->state != PDEV_STAT_RUN) { + pcr->state = PDEV_STAT_RUN; + if (pcr->ops->enable_auto_blink) + pcr->ops->enable_auto_blink(pcr); + } + + mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200)); +} +EXPORT_SYMBOL_GPL(rtsx_pci_start_run); + +int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data) +{ + int i; + u32 val = HAIMR_WRITE_START; + + val |= (u32)(addr & 0x3FFF) << 16; + val |= (u32)mask << 8; + val |= (u32)data; + + rtsx_pci_writel(pcr, RTSX_HAIMR, val); + + for (i = 0; i < MAX_RW_REG_CNT; i++) { + val = rtsx_pci_readl(pcr, RTSX_HAIMR); + if ((val & HAIMR_TRANS_END) == 0) { + if (data != (u8)val) + return -EIO; + return 0; + } + } + + return -ETIMEDOUT; +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_register); + +int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data) +{ + u32 val = HAIMR_READ_START; + int i; + + val |= (u32)(addr & 0x3FFF) << 16; + rtsx_pci_writel(pcr, RTSX_HAIMR, val); + + for (i = 0; i < MAX_RW_REG_CNT; i++) { + val = rtsx_pci_readl(pcr, RTSX_HAIMR); + if ((val & HAIMR_TRANS_END) == 0) + break; + } + + if (i >= MAX_RW_REG_CNT) + return -ETIMEDOUT; + + if (data) + *data = (u8)(val & 0xFF); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_register); + +int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val) +{ + int err, i, finished = 0; + u8 tmp; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8)); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + for (i = 0; i < 100000; i++) { + err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); + if (err < 0) + return err; + + if (!(tmp & 0x80)) { + finished = 1; + break; + } + } + + if (!finished) + return -ETIMEDOUT; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register); + +int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val) +{ + int err, i, finished = 0; + u16 data; + u8 *ptr, tmp; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + for (i = 0; i < 100000; i++) { + err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); + if (err < 0) + return err; + + if (!(tmp & 0x80)) { + finished = 1; + break; + } + } + + if (!finished) + return -ETIMEDOUT; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0); + rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + ptr = rtsx_pci_get_cmd_data(pcr); + data = ((u16)ptr[1] << 8) | ptr[0]; + + if (val) + *val = data; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register); + +void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr) +{ + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + + rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80); + rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80); +} +EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd); + +void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, + u8 cmd_type, u16 reg_addr, u8 mask, u8 data) +{ + unsigned long flags; + u32 val = 0; + u32 *ptr = (u32 *)(pcr->host_cmds_ptr); + + val |= (u32)(cmd_type & 0x03) << 30; + val |= (u32)(reg_addr & 0x3FFF) << 16; + val |= (u32)mask << 8; + val |= (u32)data; + + spin_lock_irqsave(&pcr->lock, flags); + ptr += pcr->ci; + if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) { + put_unaligned_le32(val, ptr); + ptr++; + pcr->ci++; + } + spin_unlock_irqrestore(&pcr->lock, flags); +} +EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd); + +void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr) +{ + u32 val = 1 << 31; + + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; + /* Hardware Auto Response */ + val |= 0x40000000; + rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); +} +EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait); + +int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout) +{ + struct completion trans_done; + u32 val = 1 << 31; + long timeleft; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&pcr->lock, flags); + + /* set up data structures for the wakeup system */ + pcr->done = &trans_done; + pcr->trans_result = TRANS_NOT_READY; + init_completion(&trans_done); + + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; + /* Hardware Auto Response */ + val |= 0x40000000; + rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); + + spin_unlock_irqrestore(&pcr->lock, flags); + + /* Wait for TRANS_OK_INT */ + timeleft = wait_for_completion_interruptible_timeout( + &trans_done, msecs_to_jiffies(timeout)); + if (timeleft <= 0) { + dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n", + __func__, __LINE__); + err = -ETIMEDOUT; + goto finish_send_cmd; + } + + spin_lock_irqsave(&pcr->lock, flags); + if (pcr->trans_result == TRANS_RESULT_FAIL) + err = -EINVAL; + else if (pcr->trans_result == TRANS_RESULT_OK) + err = 0; + else if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + spin_unlock_irqrestore(&pcr->lock, flags); + +finish_send_cmd: + spin_lock_irqsave(&pcr->lock, flags); + pcr->done = NULL; + spin_unlock_irqrestore(&pcr->lock, flags); + + if ((err < 0) && (err != -ENODEV)) + rtsx_pci_stop_cmd(pcr); + + if (pcr->finish_me) + complete(pcr->finish_me); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd); + +static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, + dma_addr_t addr, unsigned int len, int end) +{ + u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi; + u64 val; + u8 option = SG_VALID | SG_TRANS_DATA; + + dev_dbg(&(pcr->pci->dev), "DMA addr: 0x%x, Len: 0x%x\n", + (unsigned int)addr, len); + + if (end) + option |= SG_END; + val = ((u64)addr << 32) | ((u64)len << 12) | option; + + put_unaligned_le64(val, ptr); + ptr++; + pcr->sgi++; +} + +int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read, int timeout) +{ + struct completion trans_done; + u8 dir; + int err = 0, i, count; + long timeleft; + unsigned long flags; + struct scatterlist *sg; + enum dma_data_direction dma_dir; + u32 val; + dma_addr_t addr; + unsigned int len; + + dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg); + + /* don't transfer data during abort processing */ + if (pcr->remove_pci) + return -EINVAL; + + if ((sglist == NULL) || (num_sg <= 0)) + return -EINVAL; + + if (read) { + dir = DEVICE_TO_HOST; + dma_dir = DMA_FROM_DEVICE; + } else { + dir = HOST_TO_DEVICE; + dma_dir = DMA_TO_DEVICE; + } + + count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir); + if (count < 1) { + dev_err(&(pcr->pci->dev), "scatterlist map failed\n"); + return -EINVAL; + } + dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count); + + val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE; + pcr->sgi = 0; + for_each_sg(sglist, sg, count, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1); + } + + spin_lock_irqsave(&pcr->lock, flags); + + pcr->done = &trans_done; + pcr->trans_result = TRANS_NOT_READY; + init_completion(&trans_done); + rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, val); + + spin_unlock_irqrestore(&pcr->lock, flags); + + timeleft = wait_for_completion_interruptible_timeout( + &trans_done, msecs_to_jiffies(timeout)); + if (timeleft <= 0) { + dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n", + __func__, __LINE__); + err = -ETIMEDOUT; + goto out; + } + + spin_lock_irqsave(&pcr->lock, flags); + + if (pcr->trans_result == TRANS_RESULT_FAIL) + err = -EINVAL; + else if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + + spin_unlock_irqrestore(&pcr->lock, flags); + +out: + spin_lock_irqsave(&pcr->lock, flags); + pcr->done = NULL; + spin_unlock_irqrestore(&pcr->lock, flags); + + dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir); + + if ((err < 0) && (err != -ENODEV)) + rtsx_pci_stop_cmd(pcr); + + if (pcr->finish_me) + complete(pcr->finish_me); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data); + +int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) +{ + int err; + int i, j; + u16 reg; + u8 *ptr; + + if (buf_len > 512) + buf_len = 512; + + ptr = buf; + reg = PPBUF_BASE2; + for (i = 0; i < buf_len / 256; i++) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < 256; j++) + rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + + memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256); + ptr += 256; + } + + if (buf_len % 256) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < buf_len % 256; j++) + rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf); + +int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) +{ + int err; + int i, j; + u16 reg; + u8 *ptr; + + if (buf_len > 512) + buf_len = 512; + + ptr = buf; + reg = PPBUF_BASE2; + for (i = 0; i < buf_len / 256; i++) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < 256; j++) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + reg++, 0xFF, *ptr); + ptr++; + } + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + if (buf_len % 256) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < buf_len % 256; j++) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + reg++, 0xFF, *ptr); + ptr++; + } + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf); + +static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl) +{ + int err; + + rtsx_pci_init_cmd(pcr); + + while (*tbl & 0xFFFF0000) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + (u16)(*tbl >> 16), 0xFF, (u8)(*tbl)); + tbl++; + } + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + return 0; +} + +int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card) +{ + const u32 *tbl; + + if (card == RTSX_SD_CARD) + tbl = pcr->sd_pull_ctl_enable_tbl; + else if (card == RTSX_MS_CARD) + tbl = pcr->ms_pull_ctl_enable_tbl; + else + return -EINVAL; + + return rtsx_pci_set_pull_ctl(pcr, tbl); +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable); + +int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card) +{ + const u32 *tbl; + + if (card == RTSX_SD_CARD) + tbl = pcr->sd_pull_ctl_disable_tbl; + else if (card == RTSX_MS_CARD) + tbl = pcr->ms_pull_ctl_disable_tbl; + else + return -EINVAL; + + + return rtsx_pci_set_pull_ctl(pcr, tbl); +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable); + +static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr) +{ + pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN; + + if (pcr->num_slots > 1) + pcr->bier |= MS_INT_EN; + + /* Enable Bus Interrupt */ + rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier); + + dev_dbg(&(pcr->pci->dev), "RTSX_BIER: 0x%08x\n", pcr->bier); +} + +static inline u8 double_ssc_depth(u8 depth) +{ + return ((depth > 1) ? (depth - 1) : depth); +} + +static u8 revise_ssc_depth(u8 ssc_depth, u8 div) +{ + if (div > CLK_DIV_1) { + if (ssc_depth > (div - 1)) + ssc_depth -= (div - 1); + else + ssc_depth = SSC_DEPTH_4M; + } + + return ssc_depth; +} + +int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int err, clk; + u8 N, min_N, max_N, clk_divider; + u8 mcu_cnt, div, max_div; + u8 depth[] = { + [RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M, + [RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M, + [RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M, + [RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K, + [RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K, + }; + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + err = rtsx_pci_write_register(pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (err < 0) + return err; + + card_clock /= 1000000; + dev_dbg(&(pcr->pci->dev), "Switch card clock to %dMHz\n", card_clock); + + min_N = 80; + max_N = 208; + max_div = CLK_DIV_8; + + clk = card_clock; + if (!initial_mode && double_clk) + clk = card_clock * 2; + dev_dbg(&(pcr->pci->dev), + "Internal SSC clock: %dMHz (cur_clock = %d)\n", + clk, pcr->cur_clock); + + if (clk == pcr->cur_clock) + return 0; + + N = (u8)(clk - 2); + if ((clk <= 2) || (N > max_N)) + return -EINVAL; + + mcu_cnt = (u8)(125/clk + 3); + if (mcu_cnt > 15) + mcu_cnt = 15; + + /* Make sure that the SSC clock div_n is equal or greater than min_N */ + div = CLK_DIV_1; + while ((N < min_N) && (div < max_div)) { + N = (N + 2) * 2 - 2; + div++; + } + dev_dbg(&(pcr->pci->dev), "N = %d, div = %d\n", N, div); + + ssc_depth = depth[ssc_depth]; + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + ssc_depth = revise_ssc_depth(ssc_depth, div); + dev_dbg(&(pcr->pci->dev), "ssc_depth = %d\n", ssc_depth); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, + CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, + 0xFF, (div << 4) | mcu_cnt); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, N); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + err = rtsx_pci_send_cmd(pcr, 2000); + if (err < 0) + return err; + + /* Wait SSC clock stable */ + udelay(10); + err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + if (err < 0) + return err; + + pcr->cur_clock = clk; + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock); + +int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card) +{ + if (pcr->ops->card_power_on) + return pcr->ops->card_power_on(pcr, card); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on); + +int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card) +{ + if (pcr->ops->card_power_off) + return pcr->ops->card_power_off(pcr, card); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off); + +unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr) +{ + unsigned int val; + + val = rtsx_pci_readl(pcr, RTSX_BIPR); + if (pcr->ops->cd_deglitch) + val = pcr->ops->cd_deglitch(pcr); + + return val; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_exist); + +void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr) +{ + struct completion finish; + + pcr->finish_me = &finish; + init_completion(&finish); + + if (pcr->done) + complete(pcr->done); + + if (!pcr->remove_pci) + rtsx_pci_stop_cmd(pcr); + + wait_for_completion_interruptible_timeout(&finish, + msecs_to_jiffies(2)); + pcr->finish_me = NULL; +} +EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer); + +static void rtsx_pci_card_detect(struct work_struct *work) +{ + struct delayed_work *dwork; + struct rtsx_pcr *pcr; + unsigned long flags; + unsigned int card_detect = 0; + u32 irq_status; + + dwork = to_delayed_work(work); + pcr = container_of(dwork, struct rtsx_pcr, carddet_work); + + dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__); + + spin_lock_irqsave(&pcr->lock, flags); + + irq_status = rtsx_pci_readl(pcr, RTSX_BIPR); + dev_dbg(&(pcr->pci->dev), "irq_status: 0x%08x\n", irq_status); + + if (pcr->card_inserted || pcr->card_removed) { + dev_dbg(&(pcr->pci->dev), + "card_inserted: 0x%x, card_removed: 0x%x\n", + pcr->card_inserted, pcr->card_removed); + + if (pcr->ops->cd_deglitch) + pcr->card_inserted = pcr->ops->cd_deglitch(pcr); + + card_detect = pcr->card_inserted | pcr->card_removed; + pcr->card_inserted = 0; + pcr->card_removed = 0; + } + + spin_unlock_irqrestore(&pcr->lock, flags); + + if (card_detect & SD_EXIST) + pcr->slots[RTSX_SD_CARD].card_event( + pcr->slots[RTSX_SD_CARD].p_dev); + if (card_detect & MS_EXIST) + pcr->slots[RTSX_MS_CARD].card_event( + pcr->slots[RTSX_MS_CARD].p_dev); +} + +static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) +{ + struct rtsx_pcr *pcr = dev_id; + u32 int_reg; + + if (!pcr) + return IRQ_NONE; + + spin_lock(&pcr->lock); + + int_reg = rtsx_pci_readl(pcr, RTSX_BIPR); + /* Clear interrupt flag */ + rtsx_pci_writel(pcr, RTSX_BIPR, int_reg); + if ((int_reg & pcr->bier) == 0) { + spin_unlock(&pcr->lock); + return IRQ_NONE; + } + if (int_reg == 0xFFFFFFFF) { + spin_unlock(&pcr->lock); + return IRQ_HANDLED; + } + + int_reg &= (pcr->bier | 0x7FFFFF); + + if (int_reg & SD_INT) { + if (int_reg & SD_EXIST) { + pcr->card_inserted |= SD_EXIST; + } else { + pcr->card_removed |= SD_EXIST; + pcr->card_inserted &= ~SD_EXIST; + } + } + + if (int_reg & MS_INT) { + if (int_reg & MS_EXIST) { + pcr->card_inserted |= MS_EXIST; + } else { + pcr->card_removed |= MS_EXIST; + pcr->card_inserted &= ~MS_EXIST; + } + } + + if (pcr->card_inserted || pcr->card_removed) + schedule_delayed_work(&pcr->carddet_work, + msecs_to_jiffies(200)); + + if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) { + if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) { + pcr->trans_result = TRANS_RESULT_FAIL; + if (pcr->done) + complete(pcr->done); + } else if (int_reg & TRANS_OK_INT) { + pcr->trans_result = TRANS_RESULT_OK; + if (pcr->done) + complete(pcr->done); + } + } + + spin_unlock(&pcr->lock); + return IRQ_HANDLED; +} + +static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr) +{ + dev_info(&(pcr->pci->dev), "%s: pcr->msi_en = %d, pci->irq = %d\n", + __func__, pcr->msi_en, pcr->pci->irq); + + if (request_irq(pcr->pci->irq, rtsx_pci_isr, + pcr->msi_en ? 0 : IRQF_SHARED, + DRV_NAME_RTSX_PCI, pcr)) { + dev_err(&(pcr->pci->dev), + "rtsx_sdmmc: unable to grab IRQ %d, disabling device\n", + pcr->pci->irq); + return -1; + } + + pcr->irq = pcr->pci->irq; + pci_intx(pcr->pci, !pcr->msi_en); + + return 0; +} + +static void rtsx_pci_idle_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work); + + dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__); + + mutex_lock(&pcr->pcr_mutex); + + pcr->state = PDEV_STAT_IDLE; + + if (pcr->ops->disable_auto_blink) + pcr->ops->disable_auto_blink(pcr); + if (pcr->ops->turn_off_led) + pcr->ops->turn_off_led(pcr); + + mutex_unlock(&pcr->pcr_mutex); +} + +static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) +{ + int err; + + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + rtsx_pci_enable_bus_int(pcr); + + /* Power on SSC */ + err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + if (err < 0) + return err; + + /* Wait SSC power stable */ + udelay(200); + + if (pcr->ops->optimize_phy) { + err = pcr->ops->optimize_phy(pcr); + if (err < 0) + return err; + } + + rtsx_pci_init_cmd(pcr); + + /* Set mcu_cnt to 7 to ensure data can be sampled properly */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00); + /* Disable card clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Reset delink mode */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0); + /* Card driving select */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0x07, DRIVER_TYPE_D); + /* Enable SSC Clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, + 0xFF, SSC_8X_EN | SSC_SEL_4M); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + /* Disable cd_pwr_save */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10); + /* Clear Link Ready Interrupt */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0, + LINK_RDY_INT, LINK_RDY_INT); + /* Enlarge the estimation window of PERST# glitch + * to reduce the chance of invalid card interrupt + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80); + /* Update RC oscillator to 400k + * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1 + * 1: 2M 0: 400k + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00); + /* Set interrupt write clear + * bit 1: U_elbi_if_rd_clr_en + * 1: Enable ELBI interrupt[31:22] & [7:0] flag read clear + * 0: ELBI interrupt flag[31:22] & [7:0] only can be write clear + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0); + /* Force CLKREQ# PIN to drive 0 to request clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* Enable clk_request_n to enable clock power management */ + rtsx_pci_write_config_byte(pcr, 0x81, 1); + /* Enter L1 when host tx idle */ + rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B); + + if (pcr->ops->extra_init_hw) { + err = pcr->ops->extra_init_hw(pcr); + if (err < 0) + return err; + } + + return 0; +} + +static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) +{ + int err; + + spin_lock_init(&pcr->lock); + mutex_init(&pcr->pcr_mutex); + + switch (PCI_PID(pcr)) { + default: + case 0x5209: + rts5209_init_params(pcr); + break; + + case 0x5229: + rts5229_init_params(pcr); + break; + + case 0x5289: + rtl8411_init_params(pcr); + break; + } + + dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n", + PCI_PID(pcr), pcr->ic_version); + + pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot), + GFP_KERNEL); + if (!pcr->slots) + return -ENOMEM; + + pcr->state = PDEV_STAT_IDLE; + err = rtsx_pci_init_hw(pcr); + if (err < 0) { + kfree(pcr->slots); + return err; + } + + return 0; +} + +static int __devinit rtsx_pci_probe(struct pci_dev *pcidev, + const struct pci_device_id *id) +{ + struct rtsx_pcr *pcr; + struct pcr_handle *handle; + u32 base, len; + int ret, i; + + dev_dbg(&(pcidev->dev), + ": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n", + pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device, + (int)pcidev->revision); + + ret = pci_enable_device(pcidev); + if (ret) + return ret; + + ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI); + if (ret) + goto disable; + + pcr = kzalloc(sizeof(*pcr), GFP_KERNEL); + if (!pcr) { + ret = -ENOMEM; + goto release_pci; + } + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) { + ret = -ENOMEM; + goto free_pcr; + } + handle->pcr = pcr; + + if (!idr_pre_get(&rtsx_pci_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto free_handle; + } + + spin_lock(&rtsx_pci_lock); + ret = idr_get_new(&rtsx_pci_idr, pcr, &pcr->id); + spin_unlock(&rtsx_pci_lock); + if (ret) + goto free_handle; + + pcr->pci = pcidev; + dev_set_drvdata(&pcidev->dev, handle); + + len = pci_resource_len(pcidev, 0); + base = pci_resource_start(pcidev, 0); + pcr->remap_addr = ioremap_nocache(base, len); + if (!pcr->remap_addr) { + ret = -ENOMEM; + goto free_host; + } + + pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev), + RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr), + GFP_KERNEL); + if (pcr->rtsx_resv_buf == NULL) { + ret = -ENXIO; + goto unmap; + } + pcr->host_cmds_ptr = pcr->rtsx_resv_buf; + pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr; + pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN; + pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN; + + pcr->card_inserted = 0; + pcr->card_removed = 0; + INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect); + INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work); + + pcr->msi_en = msi_en; + if (pcr->msi_en) { + ret = pci_enable_msi(pcidev); + if (ret < 0) + pcr->msi_en = false; + } + + ret = rtsx_pci_acquire_irq(pcr); + if (ret < 0) + goto free_dma; + + pci_set_master(pcidev); + synchronize_irq(pcr->irq); + + ret = rtsx_pci_init_chip(pcr); + if (ret < 0) + goto disable_irq; + + for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) { + rtsx_pcr_cells[i].platform_data = handle; + rtsx_pcr_cells[i].pdata_size = sizeof(*handle); + } + ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells, + ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL); + if (ret < 0) + goto disable_irq; + + schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); + + return 0; + +disable_irq: + free_irq(pcr->irq, (void *)pcr); +free_dma: + dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, + pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); +unmap: + iounmap(pcr->remap_addr); +free_host: + dev_set_drvdata(&pcidev->dev, NULL); +free_handle: + kfree(handle); +free_pcr: + kfree(pcr); +release_pci: + pci_release_regions(pcidev); +disable: + pci_disable_device(pcidev); + + return ret; +} + +static void __devexit rtsx_pci_remove(struct pci_dev *pcidev) +{ + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; + + pcr->remove_pci = true; + + cancel_delayed_work(&pcr->carddet_work); + cancel_delayed_work(&pcr->idle_work); + + mfd_remove_devices(&pcidev->dev); + + dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, + pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); + free_irq(pcr->irq, (void *)pcr); + if (pcr->msi_en) + pci_disable_msi(pcr->pci); + iounmap(pcr->remap_addr); + + dev_set_drvdata(&pcidev->dev, NULL); + pci_release_regions(pcidev); + pci_disable_device(pcidev); + + spin_lock(&rtsx_pci_lock); + idr_remove(&rtsx_pci_idr, pcr->id); + spin_unlock(&rtsx_pci_lock); + + kfree(pcr->slots); + kfree(pcr); + kfree(handle); + + dev_dbg(&(pcidev->dev), + ": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n", + pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device); +} + +#ifdef CONFIG_PM + +static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + int ret = 0; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + + cancel_delayed_work(&pcr->carddet_work); + cancel_delayed_work(&pcr->idle_work); + + mutex_lock(&pcr->pcr_mutex); + + if (pcr->ops->turn_off_led) + pcr->ops->turn_off_led(pcr); + + rtsx_pci_writel(pcr, RTSX_BIER, 0); + pcr->bier = 0; + + rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08); + rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x02); + + pci_save_state(pcidev); + pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0); + pci_disable_device(pcidev); + pci_set_power_state(pcidev, pci_choose_state(pcidev, state)); + + mutex_unlock(&pcr->pcr_mutex); + return ret; +} + +static int rtsx_pci_resume(struct pci_dev *pcidev) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + int ret = 0; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + + mutex_lock(&pcr->pcr_mutex); + + pci_set_power_state(pcidev, PCI_D0); + pci_restore_state(pcidev); + ret = pci_enable_device(pcidev); + if (ret) + goto out; + pci_set_master(pcidev); + + ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00); + if (ret) + goto out; + + ret = rtsx_pci_init_hw(pcr); + if (ret) + goto out; + + schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); + +out: + mutex_unlock(&pcr->pcr_mutex); + return ret; +} + +#else /* CONFIG_PM */ + +#define rtsx_pci_suspend NULL +#define rtsx_pci_resume NULL + +#endif /* CONFIG_PM */ + +static struct pci_driver rtsx_pci_driver = { + .name = DRV_NAME_RTSX_PCI, + .id_table = rtsx_pci_ids, + .probe = rtsx_pci_probe, + .remove = __devexit_p(rtsx_pci_remove), + .suspend = rtsx_pci_suspend, + .resume = rtsx_pci_resume, +}; +module_pci_driver(rtsx_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Wei WANG "); +MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver"); diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h new file mode 100644 index 000000000000..12462c1df1a9 --- /dev/null +++ b/drivers/mfd/rtsx_pcr.h @@ -0,0 +1,32 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#ifndef __RTSX_PCR_H +#define __RTSX_PCR_H + +#include + +void rts5209_init_params(struct rtsx_pcr *pcr); +void rts5229_init_params(struct rtsx_pcr *pcr); +void rtl8411_init_params(struct rtsx_pcr *pcr); + +#endif diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h new file mode 100644 index 000000000000..a8d393e3066b --- /dev/null +++ b/include/linux/mfd/rtsx_common.h @@ -0,0 +1,48 @@ +/* Driver for Realtek driver-based card reader + * + * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#ifndef __RTSX_COMMON_H +#define __RTSX_COMMON_H + +#define DRV_NAME_RTSX_PCI "rtsx_pci" +#define DRV_NAME_RTSX_PCI_SDMMC "rtsx_pci_sdmmc" +#define DRV_NAME_RTSX_PCI_MS "rtsx_pci_ms" + +#define RTSX_REG_PAIR(addr, val) (((u32)(addr) << 16) | (u8)(val)) + +#define RTSX_SSC_DEPTH_4M 0x01 +#define RTSX_SSC_DEPTH_2M 0x02 +#define RTSX_SSC_DEPTH_1M 0x03 +#define RTSX_SSC_DEPTH_500K 0x04 +#define RTSX_SSC_DEPTH_250K 0x05 + +#define RTSX_SD_CARD 0 +#define RTSX_MS_CARD 1 + +struct platform_device; + +struct rtsx_slot { + struct platform_device *p_dev; + void (*card_event)(struct platform_device *p_dev); +}; + +#endif diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h new file mode 100644 index 000000000000..060b721fcbfb --- /dev/null +++ b/include/linux/mfd/rtsx_pci.h @@ -0,0 +1,794 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#ifndef __RTSX_PCI_H +#define __RTSX_PCI_H + +#include +#include + +#include "rtsx_common.h" + +#define MAX_RW_REG_CNT 1024 + +/* PCI Operation Register Address */ +#define RTSX_HCBAR 0x00 +#define RTSX_HCBCTLR 0x04 +#define RTSX_HDBAR 0x08 +#define RTSX_HDBCTLR 0x0C +#define RTSX_HAIMR 0x10 +#define RTSX_BIPR 0x14 +#define RTSX_BIER 0x18 + +/* Host command buffer control register */ +#define STOP_CMD (0x01 << 28) + +/* Host data buffer control register */ +#define SDMA_MODE 0x00 +#define ADMA_MODE (0x02 << 26) +#define STOP_DMA (0x01 << 28) +#define TRIG_DMA (0x01 << 31) + +/* Host access internal memory register */ +#define HAIMR_TRANS_START (0x01 << 31) +#define HAIMR_READ 0x00 +#define HAIMR_WRITE (0x01 << 30) +#define HAIMR_READ_START (HAIMR_TRANS_START | HAIMR_READ) +#define HAIMR_WRITE_START (HAIMR_TRANS_START | HAIMR_WRITE) +#define HAIMR_TRANS_END (HAIMR_TRANS_START) + +/* Bus interrupt pending register */ +#define CMD_DONE_INT (1 << 31) +#define DATA_DONE_INT (1 << 30) +#define TRANS_OK_INT (1 << 29) +#define TRANS_FAIL_INT (1 << 28) +#define XD_INT (1 << 27) +#define MS_INT (1 << 26) +#define SD_INT (1 << 25) +#define GPIO0_INT (1 << 24) +#define OC_INT (1 << 23) +#define SD_WRITE_PROTECT (1 << 19) +#define XD_EXIST (1 << 18) +#define MS_EXIST (1 << 17) +#define SD_EXIST (1 << 16) +#define DELINK_INT GPIO0_INT +#define MS_OC_INT (1 << 23) +#define SD_OC_INT (1 << 22) + +#define CARD_INT (XD_INT | MS_INT | SD_INT) +#define NEED_COMPLETE_INT (DATA_DONE_INT | TRANS_OK_INT | TRANS_FAIL_INT) +#define RTSX_INT (CMD_DONE_INT | NEED_COMPLETE_INT | \ + CARD_INT | GPIO0_INT | OC_INT) + +#define CARD_EXIST (XD_EXIST | MS_EXIST | SD_EXIST) + +/* Bus interrupt enable register */ +#define CMD_DONE_INT_EN (1 << 31) +#define DATA_DONE_INT_EN (1 << 30) +#define TRANS_OK_INT_EN (1 << 29) +#define TRANS_FAIL_INT_EN (1 << 28) +#define XD_INT_EN (1 << 27) +#define MS_INT_EN (1 << 26) +#define SD_INT_EN (1 << 25) +#define GPIO0_INT_EN (1 << 24) +#define OC_INT_EN (1 << 23) +#define DELINK_INT_EN GPIO0_INT_EN +#define MS_OC_INT_EN (1 << 23) +#define SD_OC_INT_EN (1 << 22) + +#define READ_REG_CMD 0 +#define WRITE_REG_CMD 1 +#define CHECK_REG_CMD 2 + +/* + * macros for easy use + */ +#define rtsx_pci_writel(pcr, reg, value) \ + iowrite32(value, (pcr)->remap_addr + reg) +#define rtsx_pci_readl(pcr, reg) \ + ioread32((pcr)->remap_addr + reg) +#define rtsx_pci_writew(pcr, reg, value) \ + iowrite16(value, (pcr)->remap_addr + reg) +#define rtsx_pci_readw(pcr, reg) \ + ioread16((pcr)->remap_addr + reg) +#define rtsx_pci_writeb(pcr, reg, value) \ + iowrite8(value, (pcr)->remap_addr + reg) +#define rtsx_pci_readb(pcr, reg) \ + ioread8((pcr)->remap_addr + reg) + +#define rtsx_pci_read_config_byte(pcr, where, val) \ + pci_read_config_byte((pcr)->pci, where, val) + +#define rtsx_pci_write_config_byte(pcr, where, val) \ + pci_write_config_byte((pcr)->pci, where, val) + +#define rtsx_pci_read_config_dword(pcr, where, val) \ + pci_read_config_dword((pcr)->pci, where, val) + +#define rtsx_pci_write_config_dword(pcr, where, val) \ + pci_write_config_dword((pcr)->pci, where, val) + +#define STATE_TRANS_NONE 0 +#define STATE_TRANS_CMD 1 +#define STATE_TRANS_BUF 2 +#define STATE_TRANS_SG 3 + +#define TRANS_NOT_READY 0 +#define TRANS_RESULT_OK 1 +#define TRANS_RESULT_FAIL 2 +#define TRANS_NO_DEVICE 3 + +#define RTSX_RESV_BUF_LEN 4096 +#define HOST_CMDS_BUF_LEN 1024 +#define HOST_SG_TBL_BUF_LEN (RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN) +#define HOST_SG_TBL_ITEMS (HOST_SG_TBL_BUF_LEN / 8) +#define MAX_SG_ITEM_LEN 0x80000 + +#define HOST_TO_DEVICE 0 +#define DEVICE_TO_HOST 1 + +#define MAX_PHASE 31 +#define RX_TUNING_CNT 3 + +/* SG descriptor */ +#define SG_INT 0x04 +#define SG_END 0x02 +#define SG_VALID 0x01 + +#define SG_NO_OP 0x00 +#define SG_TRANS_DATA (0x02 << 4) +#define SG_LINK_DESC (0x03 << 4) + +/* SD bank voltage */ +#define SD_IO_3V3 0 +#define SD_IO_1V8 1 + + +/* Card Clock Enable Register */ +#define SD_CLK_EN 0x04 +#define MS_CLK_EN 0x08 + +/* Card Select Register */ +#define SD_MOD_SEL 2 +#define MS_MOD_SEL 3 + +/* Card Output Enable Register */ +#define SD_OUTPUT_EN 0x04 +#define MS_OUTPUT_EN 0x08 + +/* CARD_SHARE_MODE */ +#define CARD_SHARE_MASK 0x0F +#define CARD_SHARE_MULTI_LUN 0x00 +#define CARD_SHARE_NORMAL 0x00 +#define CARD_SHARE_48_SD 0x04 +#define CARD_SHARE_48_MS 0x08 +/* CARD_SHARE_MODE for barossa */ +#define CARD_SHARE_BAROSSA_SD 0x01 +#define CARD_SHARE_BAROSSA_MS 0x02 + +/* SD30_DRIVE_SEL */ +#define DRIVER_TYPE_A 0x05 +#define DRIVER_TYPE_B 0x03 +#define DRIVER_TYPE_C 0x02 +#define DRIVER_TYPE_D 0x01 + +/* FPDCTL */ +#define SSC_POWER_DOWN 0x01 +#define SD_OC_POWER_DOWN 0x02 +#define ALL_POWER_DOWN 0x07 +#define OC_POWER_DOWN 0x06 + +/* CLK_CTL */ +#define CHANGE_CLK 0x01 + +/* LDO_CTL */ +#define BPP_LDO_POWB 0x03 +#define BPP_LDO_ON 0x00 +#define BPP_LDO_SUSPEND 0x02 +#define BPP_LDO_OFF 0x03 + +/* CD_PAD_CTL */ +#define CD_DISABLE_MASK 0x07 +#define MS_CD_DISABLE 0x04 +#define SD_CD_DISABLE 0x02 +#define XD_CD_DISABLE 0x01 +#define CD_DISABLE 0x07 +#define CD_ENABLE 0x00 +#define MS_CD_EN_ONLY 0x03 +#define SD_CD_EN_ONLY 0x05 +#define XD_CD_EN_ONLY 0x06 +#define FORCE_CD_LOW_MASK 0x38 +#define FORCE_CD_XD_LOW 0x08 +#define FORCE_CD_SD_LOW 0x10 +#define FORCE_CD_MS_LOW 0x20 +#define CD_AUTO_DISABLE 0x40 + +/* SD_STAT1 */ +#define SD_CRC7_ERR 0x80 +#define SD_CRC16_ERR 0x40 +#define SD_CRC_WRITE_ERR 0x20 +#define SD_CRC_WRITE_ERR_MASK 0x1C +#define GET_CRC_TIME_OUT 0x02 +#define SD_TUNING_COMPARE_ERR 0x01 + +/* SD_STAT2 */ +#define SD_RSP_80CLK_TIMEOUT 0x01 + +/* SD_BUS_STAT */ +#define SD_CLK_TOGGLE_EN 0x80 +#define SD_CLK_FORCE_STOP 0x40 +#define SD_DAT3_STATUS 0x10 +#define SD_DAT2_STATUS 0x08 +#define SD_DAT1_STATUS 0x04 +#define SD_DAT0_STATUS 0x02 +#define SD_CMD_STATUS 0x01 + +/* SD_PAD_CTL */ +#define SD_IO_USING_1V8 0x80 +#define SD_IO_USING_3V3 0x7F +#define TYPE_A_DRIVING 0x00 +#define TYPE_B_DRIVING 0x01 +#define TYPE_C_DRIVING 0x02 +#define TYPE_D_DRIVING 0x03 + +/* SD_SAMPLE_POINT_CTL */ +#define DDR_FIX_RX_DAT 0x00 +#define DDR_VAR_RX_DAT 0x80 +#define DDR_FIX_RX_DAT_EDGE 0x00 +#define DDR_FIX_RX_DAT_14_DELAY 0x40 +#define DDR_FIX_RX_CMD 0x00 +#define DDR_VAR_RX_CMD 0x20 +#define DDR_FIX_RX_CMD_POS_EDGE 0x00 +#define DDR_FIX_RX_CMD_14_DELAY 0x10 +#define SD20_RX_POS_EDGE 0x00 +#define SD20_RX_14_DELAY 0x08 +#define SD20_RX_SEL_MASK 0x08 + +/* SD_PUSH_POINT_CTL */ +#define DDR_FIX_TX_CMD_DAT 0x00 +#define DDR_VAR_TX_CMD_DAT 0x80 +#define DDR_FIX_TX_DAT_14_TSU 0x00 +#define DDR_FIX_TX_DAT_12_TSU 0x40 +#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 +#define DDR_FIX_TX_CMD_14_AHEAD 0x20 +#define SD20_TX_NEG_EDGE 0x00 +#define SD20_TX_14_AHEAD 0x10 +#define SD20_TX_SEL_MASK 0x10 +#define DDR_VAR_SDCLK_POL_SWAP 0x01 + +/* SD_TRANSFER */ +#define SD_TRANSFER_START 0x80 +#define SD_TRANSFER_END 0x40 +#define SD_STAT_IDLE 0x20 +#define SD_TRANSFER_ERR 0x10 +/* SD Transfer Mode definition */ +#define SD_TM_NORMAL_WRITE 0x00 +#define SD_TM_AUTO_WRITE_3 0x01 +#define SD_TM_AUTO_WRITE_4 0x02 +#define SD_TM_AUTO_READ_3 0x05 +#define SD_TM_AUTO_READ_4 0x06 +#define SD_TM_CMD_RSP 0x08 +#define SD_TM_AUTO_WRITE_1 0x09 +#define SD_TM_AUTO_WRITE_2 0x0A +#define SD_TM_NORMAL_READ 0x0C +#define SD_TM_AUTO_READ_1 0x0D +#define SD_TM_AUTO_READ_2 0x0E +#define SD_TM_AUTO_TUNING 0x0F + +/* SD_VPTX_CTL / SD_VPRX_CTL */ +#define PHASE_CHANGE 0x80 +#define PHASE_NOT_RESET 0x40 + +/* SD_DCMPS_TX_CTL / SD_DCMPS_RX_CTL */ +#define DCMPS_CHANGE 0x80 +#define DCMPS_CHANGE_DONE 0x40 +#define DCMPS_ERROR 0x20 +#define DCMPS_CURRENT_PHASE 0x1F + +/* SD Configure 1 Register */ +#define SD_CLK_DIVIDE_0 0x00 +#define SD_CLK_DIVIDE_256 0xC0 +#define SD_CLK_DIVIDE_128 0x80 +#define SD_BUS_WIDTH_1BIT 0x00 +#define SD_BUS_WIDTH_4BIT 0x01 +#define SD_BUS_WIDTH_8BIT 0x02 +#define SD_ASYNC_FIFO_NOT_RST 0x10 +#define SD_20_MODE 0x00 +#define SD_DDR_MODE 0x04 +#define SD_30_MODE 0x08 + +#define SD_CLK_DIVIDE_MASK 0xC0 + +/* SD_CMD_STATE */ +#define SD_CMD_IDLE 0x80 + +/* SD_DATA_STATE */ +#define SD_DATA_IDLE 0x80 + +/* DCM_DRP_CTL */ +#define DCM_RESET 0x08 +#define DCM_LOCKED 0x04 +#define DCM_208M 0x00 +#define DCM_TX 0x01 +#define DCM_RX 0x02 + +/* DCM_DRP_TRIG */ +#define DRP_START 0x80 +#define DRP_DONE 0x40 + +/* DCM_DRP_CFG */ +#define DRP_WRITE 0x80 +#define DRP_READ 0x00 +#define DCM_WRITE_ADDRESS_50 0x50 +#define DCM_WRITE_ADDRESS_51 0x51 +#define DCM_READ_ADDRESS_00 0x00 +#define DCM_READ_ADDRESS_51 0x51 + +/* IRQSTAT0 */ +#define DMA_DONE_INT 0x80 +#define SUSPEND_INT 0x40 +#define LINK_RDY_INT 0x20 +#define LINK_DOWN_INT 0x10 + +/* DMACTL */ +#define DMA_RST 0x80 +#define DMA_BUSY 0x04 +#define DMA_DIR_TO_CARD 0x00 +#define DMA_DIR_FROM_CARD 0x02 +#define DMA_EN 0x01 +#define DMA_128 (0 << 4) +#define DMA_256 (1 << 4) +#define DMA_512 (2 << 4) +#define DMA_1024 (3 << 4) +#define DMA_PACK_SIZE_MASK 0x30 + +/* SSC_CTL1 */ +#define SSC_RSTB 0x80 +#define SSC_8X_EN 0x40 +#define SSC_FIX_FRAC 0x20 +#define SSC_SEL_1M 0x00 +#define SSC_SEL_2M 0x08 +#define SSC_SEL_4M 0x10 +#define SSC_SEL_8M 0x18 + +/* SSC_CTL2 */ +#define SSC_DEPTH_MASK 0x07 +#define SSC_DEPTH_DISALBE 0x00 +#define SSC_DEPTH_4M 0x01 +#define SSC_DEPTH_2M 0x02 +#define SSC_DEPTH_1M 0x03 +#define SSC_DEPTH_500K 0x04 +#define SSC_DEPTH_250K 0x05 + +/* System Clock Control Register */ +#define CLK_LOW_FREQ 0x01 + +/* System Clock Divider Register */ +#define CLK_DIV_1 0x01 +#define CLK_DIV_2 0x02 +#define CLK_DIV_4 0x03 +#define CLK_DIV_8 0x04 + +/* MS_CFG */ +#define SAMPLE_TIME_RISING 0x00 +#define SAMPLE_TIME_FALLING 0x80 +#define PUSH_TIME_DEFAULT 0x00 +#define PUSH_TIME_ODD 0x40 +#define NO_EXTEND_TOGGLE 0x00 +#define EXTEND_TOGGLE_CHK 0x20 +#define MS_BUS_WIDTH_1 0x00 +#define MS_BUS_WIDTH_4 0x10 +#define MS_BUS_WIDTH_8 0x18 +#define MS_2K_SECTOR_MODE 0x04 +#define MS_512_SECTOR_MODE 0x00 +#define MS_TOGGLE_TIMEOUT_EN 0x00 +#define MS_TOGGLE_TIMEOUT_DISEN 0x01 +#define MS_NO_CHECK_INT 0x02 + +/* MS_TRANS_CFG */ +#define WAIT_INT 0x80 +#define NO_WAIT_INT 0x00 +#define NO_AUTO_READ_INT_REG 0x00 +#define AUTO_READ_INT_REG 0x40 +#define MS_CRC16_ERR 0x20 +#define MS_RDY_TIMEOUT 0x10 +#define MS_INT_CMDNK 0x08 +#define MS_INT_BREQ 0x04 +#define MS_INT_ERR 0x02 +#define MS_INT_CED 0x01 + +/* MS_TRANSFER */ +#define MS_TRANSFER_START 0x80 +#define MS_TRANSFER_END 0x40 +#define MS_TRANSFER_ERR 0x20 +#define MS_BS_STATE 0x10 +#define MS_TM_READ_BYTES 0x00 +#define MS_TM_NORMAL_READ 0x01 +#define MS_TM_WRITE_BYTES 0x04 +#define MS_TM_NORMAL_WRITE 0x05 +#define MS_TM_AUTO_READ 0x08 +#define MS_TM_AUTO_WRITE 0x0C + +/* SD Configure 2 Register */ +#define SD_CALCULATE_CRC7 0x00 +#define SD_NO_CALCULATE_CRC7 0x80 +#define SD_CHECK_CRC16 0x00 +#define SD_NO_CHECK_CRC16 0x40 +#define SD_NO_CHECK_WAIT_CRC_TO 0x20 +#define SD_WAIT_BUSY_END 0x08 +#define SD_NO_WAIT_BUSY_END 0x00 +#define SD_CHECK_CRC7 0x00 +#define SD_NO_CHECK_CRC7 0x04 +#define SD_RSP_LEN_0 0x00 +#define SD_RSP_LEN_6 0x01 +#define SD_RSP_LEN_17 0x02 +/* SD/MMC Response Type Definition */ +#define SD_RSP_TYPE_R0 0x04 +#define SD_RSP_TYPE_R1 0x01 +#define SD_RSP_TYPE_R1b 0x09 +#define SD_RSP_TYPE_R2 0x02 +#define SD_RSP_TYPE_R3 0x05 +#define SD_RSP_TYPE_R4 0x05 +#define SD_RSP_TYPE_R5 0x01 +#define SD_RSP_TYPE_R6 0x01 +#define SD_RSP_TYPE_R7 0x01 + +/* SD_CONFIURE3 */ +#define SD_RSP_80CLK_TIMEOUT_EN 0x01 + +/* Card Transfer Reset Register */ +#define SPI_STOP 0x01 +#define XD_STOP 0x02 +#define SD_STOP 0x04 +#define MS_STOP 0x08 +#define SPI_CLR_ERR 0x10 +#define XD_CLR_ERR 0x20 +#define SD_CLR_ERR 0x40 +#define MS_CLR_ERR 0x80 + +/* Card Data Source Register */ +#define PINGPONG_BUFFER 0x01 +#define RING_BUFFER 0x00 + +/* Card Power Control Register */ +#define PMOS_STRG_MASK 0x10 +#define PMOS_STRG_800mA 0x10 +#define PMOS_STRG_400mA 0x00 +#define SD_POWER_OFF 0x03 +#define SD_PARTIAL_POWER_ON 0x01 +#define SD_POWER_ON 0x00 +#define SD_POWER_MASK 0x03 +#define MS_POWER_OFF 0x0C +#define MS_PARTIAL_POWER_ON 0x04 +#define MS_POWER_ON 0x00 +#define MS_POWER_MASK 0x0C +#define BPP_POWER_OFF 0x0F +#define BPP_POWER_5_PERCENT_ON 0x0E +#define BPP_POWER_10_PERCENT_ON 0x0C +#define BPP_POWER_15_PERCENT_ON 0x08 +#define BPP_POWER_ON 0x00 +#define BPP_POWER_MASK 0x0F + +/* PWR_GATE_CTRL */ +#define PWR_GATE_EN 0x01 +#define LDO3318_PWR_MASK 0x06 +#define LDO_ON 0x00 +#define LDO_SUSPEND 0x04 +#define LDO_OFF 0x06 + +/* CARD_CLK_SOURCE */ +#define CRC_FIX_CLK (0x00 << 0) +#define CRC_VAR_CLK0 (0x01 << 0) +#define CRC_VAR_CLK1 (0x02 << 0) +#define SD30_FIX_CLK (0x00 << 2) +#define SD30_VAR_CLK0 (0x01 << 2) +#define SD30_VAR_CLK1 (0x02 << 2) +#define SAMPLE_FIX_CLK (0x00 << 4) +#define SAMPLE_VAR_CLK0 (0x01 << 4) +#define SAMPLE_VAR_CLK1 (0x02 << 4) + +#define MS_CFG 0xFD40 +#define MS_TPC 0xFD41 +#define MS_TRANS_CFG 0xFD42 +#define MS_TRANSFER 0xFD43 +#define MS_INT_REG 0xFD44 +#define MS_BYTE_CNT 0xFD45 +#define MS_SECTOR_CNT_L 0xFD46 +#define MS_SECTOR_CNT_H 0xFD47 +#define MS_DBUS_H 0xFD48 + +#define SD_CFG1 0xFDA0 +#define SD_CFG2 0xFDA1 +#define SD_CFG3 0xFDA2 +#define SD_STAT1 0xFDA3 +#define SD_STAT2 0xFDA4 +#define SD_BUS_STAT 0xFDA5 +#define SD_PAD_CTL 0xFDA6 +#define SD_SAMPLE_POINT_CTL 0xFDA7 +#define SD_PUSH_POINT_CTL 0xFDA8 +#define SD_CMD0 0xFDA9 +#define SD_CMD1 0xFDAA +#define SD_CMD2 0xFDAB +#define SD_CMD3 0xFDAC +#define SD_CMD4 0xFDAD +#define SD_CMD5 0xFDAE +#define SD_BYTE_CNT_L 0xFDAF +#define SD_BYTE_CNT_H 0xFDB0 +#define SD_BLOCK_CNT_L 0xFDB1 +#define SD_BLOCK_CNT_H 0xFDB2 +#define SD_TRANSFER 0xFDB3 +#define SD_CMD_STATE 0xFDB5 +#define SD_DATA_STATE 0xFDB6 + +#define SRCTL 0xFC13 + +#define DCM_DRP_CTL 0xFC23 +#define DCM_DRP_TRIG 0xFC24 +#define DCM_DRP_CFG 0xFC25 +#define DCM_DRP_WR_DATA_L 0xFC26 +#define DCM_DRP_WR_DATA_H 0xFC27 +#define DCM_DRP_RD_DATA_L 0xFC28 +#define DCM_DRP_RD_DATA_H 0xFC29 +#define SD_VPCLK0_CTL 0xFC2A +#define SD_VPCLK1_CTL 0xFC2B +#define SD_DCMPS0_CTL 0xFC2C +#define SD_DCMPS1_CTL 0xFC2D +#define SD_VPTX_CTL SD_VPCLK0_CTL +#define SD_VPRX_CTL SD_VPCLK1_CTL +#define SD_DCMPS_TX_CTL SD_DCMPS0_CTL +#define SD_DCMPS_RX_CTL SD_DCMPS1_CTL +#define CARD_CLK_SOURCE 0xFC2E + +#define CARD_PWR_CTL 0xFD50 +#define CARD_CLK_SWITCH 0xFD51 +#define CARD_SHARE_MODE 0xFD52 +#define CARD_DRIVE_SEL 0xFD53 +#define CARD_STOP 0xFD54 +#define CARD_OE 0xFD55 +#define CARD_AUTO_BLINK 0xFD56 +#define CARD_GPIO_DIR 0xFD57 +#define CARD_GPIO 0xFD58 +#define CARD_DATA_SOURCE 0xFD5B +#define CARD_SELECT 0xFD5C +#define SD30_DRIVE_SEL 0xFD5E +#define CARD_CLK_EN 0xFD69 +#define SDIO_CTRL 0xFD6B +#define CD_PAD_CTL 0xFD73 + +#define FPDCTL 0xFC00 +#define PDINFO 0xFC01 + +#define CLK_CTL 0xFC02 +#define CLK_DIV 0xFC03 +#define CLK_SEL 0xFC04 + +#define SSC_DIV_N_0 0xFC0F +#define SSC_DIV_N_1 0xFC10 +#define SSC_CTL1 0xFC11 +#define SSC_CTL2 0xFC12 + +#define RCCTL 0xFC14 + +#define FPGA_PULL_CTL 0xFC1D +#define OLT_LED_CTL 0xFC1E +#define GPIO_CTL 0xFC1F + +#define LDO_CTL 0xFC1E +#define SYS_VER 0xFC32 + +#define CARD_PULL_CTL1 0xFD60 +#define CARD_PULL_CTL2 0xFD61 +#define CARD_PULL_CTL3 0xFD62 +#define CARD_PULL_CTL4 0xFD63 +#define CARD_PULL_CTL5 0xFD64 +#define CARD_PULL_CTL6 0xFD65 + +/* PCI Express Related Registers */ +#define IRQEN0 0xFE20 +#define IRQSTAT0 0xFE21 +#define IRQEN1 0xFE22 +#define IRQSTAT1 0xFE23 +#define TLPRIEN 0xFE24 +#define TLPRISTAT 0xFE25 +#define TLPTIEN 0xFE26 +#define TLPTISTAT 0xFE27 +#define DMATC0 0xFE28 +#define DMATC1 0xFE29 +#define DMATC2 0xFE2A +#define DMATC3 0xFE2B +#define DMACTL 0xFE2C +#define BCTL 0xFE2D +#define RBBC0 0xFE2E +#define RBBC1 0xFE2F +#define RBDAT 0xFE30 +#define RBCTL 0xFE34 +#define CFGADDR0 0xFE35 +#define CFGADDR1 0xFE36 +#define CFGDATA0 0xFE37 +#define CFGDATA1 0xFE38 +#define CFGDATA2 0xFE39 +#define CFGDATA3 0xFE3A +#define CFGRWCTL 0xFE3B +#define PHYRWCTL 0xFE3C +#define PHYDATA0 0xFE3D +#define PHYDATA1 0xFE3E +#define PHYADDR 0xFE3F +#define MSGRXDATA0 0xFE40 +#define MSGRXDATA1 0xFE41 +#define MSGRXDATA2 0xFE42 +#define MSGRXDATA3 0xFE43 +#define MSGTXDATA0 0xFE44 +#define MSGTXDATA1 0xFE45 +#define MSGTXDATA2 0xFE46 +#define MSGTXDATA3 0xFE47 +#define MSGTXCTL 0xFE48 +#define PETXCFG 0xFE49 + +#define CDRESUMECTL 0xFE52 +#define WAKE_SEL_CTL 0xFE54 +#define PME_FORCE_CTL 0xFE56 +#define ASPM_FORCE_CTL 0xFE57 +#define PM_CLK_FORCE_CTL 0xFE58 +#define PERST_GLITCH_WIDTH 0xFE5C +#define CHANGE_LINK_STATE 0xFE5B +#define RESET_LOAD_REG 0xFE5E +#define EFUSE_CONTENT 0xFE5F +#define HOST_SLEEP_STATE 0xFE60 +#define SDIO_CFG 0xFE70 + +#define NFTS_TX_CTRL 0xFE72 + +#define PWR_GATE_CTRL 0xFE75 +#define PWD_SUSPEND_EN 0xFE76 +#define LDO_PWR_SEL 0xFE78 + +#define DUMMY_REG_RESET_0 0xFE90 + +/* Memory mapping */ +#define SRAM_BASE 0xE600 +#define RBUF_BASE 0xF400 +#define PPBUF_BASE1 0xF800 +#define PPBUF_BASE2 0xFA00 +#define IMAGE_FLAG_ADDR0 0xCE80 +#define IMAGE_FLAG_ADDR1 0xCE81 + +#define rtsx_pci_init_cmd(pcr) ((pcr)->ci = 0) + +struct rtsx_pcr; + +struct pcr_handle { + struct rtsx_pcr *pcr; +}; + +struct pcr_ops { + int (*extra_init_hw)(struct rtsx_pcr *pcr); + int (*optimize_phy)(struct rtsx_pcr *pcr); + int (*turn_on_led)(struct rtsx_pcr *pcr); + int (*turn_off_led)(struct rtsx_pcr *pcr); + int (*enable_auto_blink)(struct rtsx_pcr *pcr); + int (*disable_auto_blink)(struct rtsx_pcr *pcr); + int (*card_power_on)(struct rtsx_pcr *pcr, int card); + int (*card_power_off)(struct rtsx_pcr *pcr, int card); + unsigned int (*cd_deglitch)(struct rtsx_pcr *pcr); +}; + +enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; + +struct rtsx_pcr { + struct pci_dev *pci; + unsigned int id; + + /* pci resources */ + unsigned long addr; + void __iomem *remap_addr; + int irq; + + /* host reserved buffer */ + void *rtsx_resv_buf; + dma_addr_t rtsx_resv_buf_addr; + + void *host_cmds_ptr; + dma_addr_t host_cmds_addr; + int ci; + + void *host_sg_tbl_ptr; + dma_addr_t host_sg_tbl_addr; + int sgi; + + u32 bier; + char trans_result; + + unsigned int card_inserted; + unsigned int card_removed; + + struct delayed_work carddet_work; + struct delayed_work idle_work; + + spinlock_t lock; + struct mutex pcr_mutex; + struct completion *done; + struct completion *finish_me; + + unsigned int cur_clock; + bool ms_pmos; + bool remove_pci; + bool msi_en; + +#define EXTRA_CAPS_SD_SDR50 (1 << 0) +#define EXTRA_CAPS_SD_SDR104 (1 << 1) +#define EXTRA_CAPS_SD_DDR50 (1 << 2) +#define EXTRA_CAPS_MMC_HSDDR (1 << 3) +#define EXTRA_CAPS_MMC_HS200 (1 << 4) +#define EXTRA_CAPS_MMC_8BIT (1 << 5) + u32 extra_caps; + +#define IC_VER_A 0 +#define IC_VER_B 1 +#define IC_VER_C 2 +#define IC_VER_D 3 + u8 ic_version; + + const u32 *sd_pull_ctl_enable_tbl; + const u32 *sd_pull_ctl_disable_tbl; + const u32 *ms_pull_ctl_enable_tbl; + const u32 *ms_pull_ctl_disable_tbl; + + const struct pcr_ops *ops; + enum PDEV_STAT state; + + int num_slots; + struct rtsx_slot *slots; +}; + +#define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) +#define PCI_VID(pcr) ((pcr)->pci->vendor) +#define PCI_PID(pcr) ((pcr)->pci->device) + +void rtsx_pci_start_run(struct rtsx_pcr *pcr); +int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data); +int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data); +int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val); +int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val); +void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr); +void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, + u8 cmd_type, u16 reg_addr, u8 mask, u8 data); +void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); +int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); +int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read, int timeout); +int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); +int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); +int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card); +int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card); +int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); +int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card); +int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card); +unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr); +void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr); + +static inline u8 *rtsx_pci_get_cmd_data(struct rtsx_pcr *pcr) +{ + return (u8 *)(pcr->host_cmds_ptr); +} + +#endif -- cgit v1.2.3 From f399002e68e626e7bc443e6fcab1772704cc197f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Mar 2011 14:31:09 +0100 Subject: drbd: distribute former syncer_conf settings to disk, connection, and resource level This commit breaks the API again. Move per-volume former syncer options into disk_conf. Move per-connection former syncer options into net_conf. Renamed the remainign sync_conf to res_opts Syncer settings have been changeable at runtime, so we need to prepare for these settings to be runtime-changeable in their new home as well. Introduce new configuration operations, and share the netlink attribute between "attach" (create new disk) and "disk-opts" (change options). Same for "connect" and "net-opts". Some fields cannot be changed at runtime, however. Introduce a new flag GENLA_F_INVARIANT to be able to trigger on that in the generated validation and assignment functions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +- drivers/block/drbd/drbd_main.c | 72 +++-- drivers/block/drbd/drbd_nl.c | 550 ++++++++++++++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 51 ++-- drivers/block/drbd/drbd_state.c | 4 +- drivers/block/drbd/drbd_worker.c | 50 ++-- include/linux/drbd_genl.h | 133 +++++---- include/linux/drbd_limits.h | 2 + include/linux/genl_magic_func.h | 49 ++-- include/linux/genl_magic_struct.h | 18 +- 10 files changed, 572 insertions(+), 367 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d6e7e657e7a4..bc265f3733c6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -860,7 +860,7 @@ struct drbd_md { s32 bm_offset; /* signed relative sector offset to bitmap */ /* u32 al_nr_extents; important for restoring the AL - * is stored into sync_conf.al_extents, which in turn + * is stored into ldev->dc.al_extents, which in turn * gets applied to act_log->nr_elements */ }; @@ -929,6 +929,7 @@ struct drbd_tconn { /* is a resource from the config file */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ + struct res_opts res_opts; struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ @@ -945,6 +946,8 @@ struct drbd_tconn { /* is a resource from the config file */ struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + struct crypto_hash *csums_tfm; + struct crypto_hash *verify_tfm; void *int_dig_out; void *int_dig_in; void *int_dig_vv; @@ -963,7 +966,6 @@ struct drbd_conf { unsigned long flags; /* configured by drbdsetup */ - struct syncer_conf sync_conf; struct drbd_backing_dev *ldev __protected_by(local); sector_t p_size; /* partner's disk size */ @@ -1037,8 +1039,6 @@ struct drbd_conf { /* size of out-of-sync range in sectors. */ sector_t ov_last_oos_size; unsigned long ov_left; /* in bits */ - struct crypto_hash *csums_tfm; - struct crypto_hash *verify_tfm; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -1188,7 +1188,7 @@ extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 -extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); +extern int drbd_send_sync_param(struct drbd_conf *mdev); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 79a0e042252f..bdb12723585e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -784,7 +784,7 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, return ok; } -int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) +int drbd_send_sync_param(struct drbd_conf *mdev) { struct p_rs_param_95 *p; struct socket *sock; @@ -793,7 +793,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) - + strlen(mdev->sync_conf.verify_alg) + 1 + + strlen(mdev->tconn->net_conf->verify_alg) + 1 : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); @@ -812,16 +812,25 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - p->rate = cpu_to_be32(sc->rate); - p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead); - p->c_delay_target = cpu_to_be32(sc->c_delay_target); - p->c_fill_target = cpu_to_be32(sc->c_fill_target); - p->c_max_rate = cpu_to_be32(sc->c_max_rate); + if (get_ldev(mdev)) { + p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate); + p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead); + p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target); + p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target); + p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate); + put_ldev(mdev); + } else { + p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); + p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); + p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); + p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); + } if (apv >= 88) - strcpy(p->verify_alg, mdev->sync_conf.verify_alg); + strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, mdev->sync_conf.csums_alg); + strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg); rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); } else @@ -1043,7 +1052,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, int bits; /* may we use this feature? */ - if ((mdev->sync_conf.use_rle == 0) || + if ((mdev->tconn->net_conf->use_rle == 0) || (mdev->tconn->agreed_pro_version < 90)) return 0; @@ -1790,26 +1799,8 @@ static int drbd_release(struct gendisk *gd, fmode_t mode) static void drbd_set_defaults(struct drbd_conf *mdev) { - /* This way we get a compile error when sync_conf grows, - and we forgot to initialize it here */ - mdev->sync_conf = (struct syncer_conf) { - /* .rate = */ DRBD_RATE_DEF, - /* .after = */ DRBD_AFTER_DEF, - /* .al_extents = */ DRBD_AL_EXTENTS_DEF, - /* .verify_alg = */ {}, 0, - /* .cpu_mask = */ {}, 0, - /* .csums_alg = */ {}, 0, - /* .use_rle = */ 0, - /* .on_no_data = */ DRBD_ON_NO_DATA_DEF, - /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF, - /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF, - /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF, - /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF, - /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF - }; - - /* Have to use that way, because the layout differs between - big endian and little endian */ + /* Beware! The actual layout differs + * between big endian and little endian */ mdev->state = (union drbd_state) { { .role = R_SECONDARY, .peer = R_UNKNOWN, @@ -2286,6 +2277,11 @@ struct drbd_tconn *drbd_new_tconn(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + tconn->res_opts = (struct res_opts) { + {}, 0, /* cpu_mask */ + DRBD_ON_NO_DATA_DEF, /* on_no_data */ + }; + mutex_lock(&drbd_cfg_mutex); list_add_tail(&tconn->all_tconn, &drbd_tconns); mutex_unlock(&drbd_cfg_mutex); @@ -2559,10 +2555,10 @@ void drbd_free_sock(struct drbd_tconn *tconn) void drbd_free_resources(struct drbd_conf *mdev) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = NULL; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = NULL; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = NULL; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = NULL; crypto_free_hash(mdev->tconn->cram_hmac_tfm); mdev->tconn->cram_hmac_tfm = NULL; crypto_free_hash(mdev->tconn->integrity_w_tfm); @@ -2589,7 +2585,7 @@ struct meta_data_on_disk { u32 md_size_sect; u32 al_offset; /* offset to this block */ u32 al_nr_extents; /* important for restoring the AL */ - /* `-- act_log->nr_elements <-- sync_conf.al_extents */ + /* `-- act_log->nr_elements <-- ldev->dc.al_extents */ u32 bm_offset; /* offset to the bitmap, from here */ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ u32 la_peer_max_bio_size; /* last peer max_bio_size */ @@ -2715,7 +2711,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); - mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); + bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); spin_lock_irq(&mdev->tconn->req_lock); @@ -2727,8 +2723,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - if (mdev->sync_conf.al_extents < 7) - mdev->sync_conf.al_extents = 127; + if (bdev->dc.al_extents < 7) + bdev->dc.al_extents = 127; err: mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ac0a175e778c..18cd2ed4e8ca 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -53,8 +53,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); @@ -66,7 +68,7 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); /* .dumpit */ @@ -170,7 +172,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ - err = drbd_cfg_context_from_attrs(NULL, info->attrs); + err = drbd_cfg_context_from_attrs(NULL, info); if (err) goto fail; @@ -616,6 +618,7 @@ static const char *from_attrs_err_to_txt(int err) { return err == -ENOMSG ? "required attribute missing" : err == -EOPNOTSUPP ? "unknown mandatory attribute" : + err == -EEXIST ? "can not change invariant setting" : "invalid attribute value"; } @@ -633,7 +636,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { - err = set_role_parms_from_attrs(&parms, info->attrs); + err = set_role_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -898,24 +901,24 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass * failed, and 0 on success. You should call drbd_md_sync() after you called * this function. */ -static int drbd_check_al_size(struct drbd_conf *mdev) +static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc) { struct lru_cache *n, *t; struct lc_element *e; unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) - mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN)) + dc->al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && - mdev->act_log->nr_elements == mdev->sync_conf.al_extents) + mdev->act_log->nr_elements == dc->al_extents) return 0; in_use = 0; t = mdev->act_log; n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, - mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); + dc->al_extents, sizeof(struct lc_element), 0); if (n == NULL) { dev_err(DEV, "Cannot allocate act_log lru!\n"); @@ -1069,6 +1072,114 @@ static void drbd_suspend_al(struct drbd_conf *mdev) dev_info(DEV, "Suspended AL updates\n"); } +int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_conf *mdev; + struct disk_conf *ndc; /* new disk conf */ + int err, fifo_size; + int *rs_plan_s = NULL; + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + mdev = adm_ctx.mdev; + + /* we also need a disk + * to change the options on */ + if (!get_ldev(mdev)) { + retcode = ERR_NO_DISK; + goto out; + } + +/* FIXME freeze IO, cluster wide. + * + * We should make sure no-one uses + * some half-updated struct when we + * assign it later. */ + + ndc = kmalloc(sizeof(*ndc), GFP_KERNEL); + if (!ndc) { + retcode = ERR_NOMEM; + goto fail; + } + + memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc)); + err = disk_conf_from_attrs_for_change(ndc, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + } + + if (!expect(ndc->resync_rate >= 1)) + ndc->resync_rate = 1; + + /* clip to allowed range */ + if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN)) + ndc->al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX)) + ndc->al_extents = DRBD_AL_EXTENTS_MAX; + + /* most sanity checks done, try to assign the new sync-after + * dependency. need to hold the global lock in there, + * to avoid a race in the dependency loop check. */ + retcode = drbd_alter_sa(mdev, ndc->resync_after); + if (retcode != NO_ERROR) + goto fail; + + fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; + if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { + rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + if (!rs_plan_s) { + dev_err(DEV, "kmalloc of fifo_buffer failed"); + retcode = ERR_NOMEM; + goto fail; + } + } + + if (fifo_size != mdev->rs_plan_s.size) { + kfree(mdev->rs_plan_s.values); + mdev->rs_plan_s.values = rs_plan_s; + mdev->rs_plan_s.size = fifo_size; + mdev->rs_planed = 0; + rs_plan_s = NULL; + } + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + drbd_al_shrink(mdev); + err = drbd_check_al_size(mdev, ndc); + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + if (err) { + retcode = ERR_NOMEM; + goto fail; + } + + /* FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + mdev->ldev->dc = *ndc; + + drbd_md_sync(mdev); + + + if (mdev->state.conn >= C_CONNECTED) + drbd_send_sync_param(mdev); + + fail: + put_ldev(mdev); + kfree(ndc); + kfree(rs_plan_s); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; @@ -1111,12 +1222,29 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; - nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; - nbc->dc.fencing = DRBD_FENCING_DEF; - nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - - err = disk_conf_from_attrs(&nbc->dc, info->attrs); + nbc->dc = (struct disk_conf) { + {}, 0, /* backing_dev */ + {}, 0, /* meta_dev */ + 0, /* meta_dev_idx */ + DRBD_DISK_SIZE_SECT_DEF, /* disk_size */ + DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */ + DRBD_ON_IO_ERROR_DEF, /* on_io_error */ + DRBD_FENCING_DEF, /* fencing */ + DRBD_RATE_DEF, /* resync_rate */ + DRBD_AFTER_DEF, /* resync_after */ + DRBD_AL_EXTENTS_DEF, /* al_extents */ + DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */ + DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */ + DRBD_C_FILL_TARGET_DEF, /* c_fill_target */ + DRBD_C_MAX_RATE_DEF, /* c_max_rate */ + DRBD_C_MIN_RATE_DEF, /* c_min_rate */ + 0, /* no_disk_barrier */ + 0, /* no_disk_flush */ + 0, /* no_disk_drain */ + 0, /* no_md_flush */ + }; + + err = disk_conf_from_attrs(&nbc->dc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1267,7 +1395,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } /* Since we are diskless, fix the activity log first... */ - if (drbd_check_al_size(mdev)) { + if (drbd_check_al_size(mdev, &nbc->dc)) { retcode = ERR_NOMEM; goto force_diskless_dec; } @@ -1498,6 +1626,158 @@ out: return 0; } +static bool conn_resync_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET || + mdev->state.conn == C_PAUSED_SYNC_S || + mdev->state.conn == C_PAUSED_SYNC_T) + return true; + } + return false; +} + +static bool conn_ov_running(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (mdev->state.conn == C_VERIFY_S || + mdev->state.conn == C_VERIFY_T) + return true; + } + return false; +} + +int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) +{ + enum drbd_ret_code retcode; + struct drbd_tconn *tconn; + struct net_conf *new_conf = NULL; + int err; + int ovr; /* online verify running */ + int rsr; /* re-sync running */ + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + + + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); + if (!adm_ctx.reply_skb) + return retcode; + if (retcode != NO_ERROR) + goto out; + + tconn = adm_ctx.tconn; + + new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_conf) { + retcode = ERR_NOMEM; + goto out; + } + + /* we also need a net config + * to change the options on */ + if (!get_net_conf(tconn)) { + drbd_msg_put_info("net conf missing, try connect"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + + conn_reconfig_start(tconn); + + memcpy(new_conf, tconn->net_conf, sizeof(*new_conf)); + err = net_conf_from_attrs_for_change(new_conf, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto fail; + } + + /* re-sync running */ + rsr = conn_resync_running(tconn); + if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) { + retcode = ERR_CSUMS_RESYNC_RUNNING; + goto fail; + } + + if (!rsr && new_conf->csums_alg[0]) { + csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; + retcode = ERR_CSUMS_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { + retcode = ERR_CSUMS_ALG_ND; + goto fail; + } + } + + /* online verify running */ + ovr = conn_ov_running(tconn); + if (ovr) { + if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) { + retcode = ERR_VERIFY_RUNNING; + goto fail; + } + } + + if (!ovr && new_conf->verify_alg[0]) { + verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; + retcode = ERR_VERIFY_ALG; + goto fail; + } + + if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { + retcode = ERR_VERIFY_ALG_ND; + goto fail; + } + } + + + /* For now, use struct assignment, not pointer assignment. + * We don't have any means to determine who might still + * keep a local alias into the struct, + * so we cannot just free it and hope for the best :( + * FIXME + * To avoid someone looking at a half-updated struct, we probably + * should have a rw-semaphor on net_conf and disk_conf. + */ + *tconn->net_conf = *new_conf; + + if (!rsr) { + crypto_free_hash(tconn->csums_tfm); + tconn->csums_tfm = csums_tfm; + csums_tfm = NULL; + } + if (!ovr) { + crypto_free_hash(tconn->verify_tfm); + tconn->verify_tfm = verify_tfm; + verify_tfm = NULL; + } + + if (tconn->cstate >= C_WF_REPORT_PARAMS) + drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn))); + + fail: + crypto_free_hash(csums_tfm); + crypto_free_hash(verify_tfm); + kfree(new_conf); + put_net_conf(tconn); + conn_reconfig_done(tconn); + out: + drbd_adm_finish(info, retcode); + return 0; +} + int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) { char hmac_name[CRYPTO_MAX_ALG_NAME]; @@ -1531,33 +1811,47 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) } /* allocation not in the IO path, cqueue thread context */ - new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); + new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; goto fail; } - new_conf->timeout = DRBD_TIMEOUT_DEF; - new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; - new_conf->ping_int = DRBD_PING_INT_DEF; - new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; - new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; - new_conf->ko_count = DRBD_KO_COUNT_DEF; - new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - new_conf->want_lose = 0; - new_conf->two_primaries = 0; - new_conf->wire_protocol = DRBD_PROT_C; - new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; - new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; - new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; - new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - - err = net_conf_from_attrs(new_conf, info->attrs); + *new_conf = (struct net_conf) { + {}, 0, /* my_addr */ + {}, 0, /* peer_addr */ + {}, 0, /* shared_secret */ + {}, 0, /* cram_hmac_alg */ + {}, 0, /* integrity_alg */ + {}, 0, /* verify_alg */ + {}, 0, /* csums_alg */ + DRBD_PROTOCOL_DEF, /* wire_protocol */ + DRBD_CONNECT_INT_DEF, /* try_connect_int */ + DRBD_TIMEOUT_DEF, /* timeout */ + DRBD_PING_INT_DEF, /* ping_int */ + DRBD_PING_TIMEO_DEF, /* ping_timeo */ + DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */ + DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */ + DRBD_KO_COUNT_DEF, /* ko_count */ + DRBD_MAX_BUFFERS_DEF, /* max_buffers */ + DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */ + DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */ + DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */ + DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */ + DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */ + DRBD_RR_CONFLICT_DEF, /* rr_conflict */ + DRBD_ON_CONGESTION_DEF, /* on_congestion */ + DRBD_CONG_FILL_DEF, /* cong_fill */ + DRBD_CONG_EXTENTS_DEF, /* cong_extents */ + 0, /* two_primaries */ + 0, /* want_lose */ + 0, /* no_cork */ + 0, /* always_asbp */ + 0, /* dry_run */ + 0, /* use_rle */ + }; + + err = net_conf_from_attrs(new_conf, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1789,7 +2083,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) tconn = adm_ctx.tconn; memset(&parms, 0, sizeof(parms)); if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { - err = disconnect_parms_from_attrs(&parms, info->attrs); + err = disconnect_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1848,7 +2142,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) memset(&rs, 0, sizeof(struct resize_parms)); if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { - err = resize_parms_from_attrs(&rs, info->attrs); + err = resize_parms_from_attrs(&rs, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -1904,26 +2198,21 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } -int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { - struct drbd_conf *mdev; enum drbd_ret_code retcode; - int err; - int ovr; /* online verify running */ - int rsr; /* re-sync running */ - struct crypto_hash *verify_tfm = NULL; - struct crypto_hash *csums_tfm = NULL; - struct syncer_conf sc; cpumask_var_t new_cpu_mask; + struct drbd_tconn *tconn; int *rs_plan_s = NULL; - int fifo_size; + struct res_opts sc; + int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto fail; - mdev = adm_ctx.mdev; + tconn = adm_ctx.tconn; if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { retcode = ERR_NOMEM; @@ -1933,172 +2222,43 @@ int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info) if (((struct drbd_genlmsghdr*)info->userhdr)->flags & DRBD_GENL_F_SET_DEFAULTS) { - memset(&sc, 0, sizeof(struct syncer_conf)); - sc.rate = DRBD_RATE_DEF; - sc.after = DRBD_AFTER_DEF; - sc.al_extents = DRBD_AL_EXTENTS_DEF; + memset(&sc, 0, sizeof(struct res_opts)); sc.on_no_data = DRBD_ON_NO_DATA_DEF; - sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; - sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; - sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; - sc.c_max_rate = DRBD_C_MAX_RATE_DEF; - sc.c_min_rate = DRBD_C_MIN_RATE_DEF; } else - memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); + sc = tconn->res_opts; - err = syncer_conf_from_attrs(&sc, info->attrs); + err = res_opts_from_attrs(&sc, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto fail; } - /* re-sync running */ - rsr = ( mdev->state.conn == C_SYNC_SOURCE || - mdev->state.conn == C_SYNC_TARGET || - mdev->state.conn == C_PAUSED_SYNC_S || - mdev->state.conn == C_PAUSED_SYNC_T ); - - if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { - retcode = ERR_CSUMS_RESYNC_RUNNING; - goto fail; - } - - if (!rsr && sc.csums_alg[0]) { - csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(csums_tfm)) { - csums_tfm = NULL; - retcode = ERR_CSUMS_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { - retcode = ERR_CSUMS_ALG_ND; - goto fail; - } - } - - /* online verify running */ - ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T); - - if (ovr) { - if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { - retcode = ERR_VERIFY_RUNNING; - goto fail; - } - } - - if (!ovr && sc.verify_alg[0]) { - verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(verify_tfm)) { - verify_tfm = NULL; - retcode = ERR_VERIFY_ALG; - goto fail; - } - - if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { - retcode = ERR_VERIFY_ALG_ND; - goto fail; - } - } - /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { err = __bitmap_parse(sc.cpu_mask, 32, 0, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); + conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); retcode = ERR_CPU_MASK_PARSE; goto fail; } } - if (!expect(sc.rate >= 1)) - sc.rate = 1; - - /* clip to allowed range */ - if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) - sc.al_extents = DRBD_AL_EXTENTS_MIN; - if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) - sc.al_extents = DRBD_AL_EXTENTS_MAX; - - /* most sanity checks done, try to assign the new sync-after - * dependency. need to hold the global lock in there, - * to avoid a race in the dependency loop check. */ - retcode = drbd_alter_sa(mdev, sc.after); - if (retcode != NO_ERROR) - goto fail; - - fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; - if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { - rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); - if (!rs_plan_s) { - dev_err(DEV, "kmalloc of fifo_buffer failed"); - retcode = ERR_NOMEM; - goto fail; - } - } - - /* ok, assign the rest of it as well. - * lock against receive_SyncParam() */ - spin_lock(&mdev->peer_seq_lock); - mdev->sync_conf = sc; - - if (!rsr) { - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; - csums_tfm = NULL; - } - - if (!ovr) { - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; - verify_tfm = NULL; - } - - if (fifo_size != mdev->rs_plan_s.size) { - kfree(mdev->rs_plan_s.values); - mdev->rs_plan_s.values = rs_plan_s; - mdev->rs_plan_s.size = fifo_size; - mdev->rs_planed = 0; - rs_plan_s = NULL; - } - - spin_unlock(&mdev->peer_seq_lock); - if (get_ldev(mdev)) { - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - drbd_al_shrink(mdev); - err = drbd_check_al_size(mdev); - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); + tconn->res_opts = sc; - put_ldev(mdev); - drbd_md_sync(mdev); - - if (err) { - retcode = ERR_NOMEM; - goto fail; - } - } - - if (mdev->state.conn >= C_CONNECTED) - drbd_send_sync_param(mdev, &sc); - - if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) { - cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(mdev->tconn); - mdev->tconn->receiver.reset_cpu_mask = 1; - mdev->tconn->asender.reset_cpu_mask = 1; - mdev->tconn->worker.reset_cpu_mask = 1; + if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { + cpumask_copy(tconn->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(tconn); + tconn->receiver.reset_cpu_mask = 1; + tconn->asender.reset_cpu_mask = 1; + tconn->worker.reset_cpu_mask = 1; } - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: kfree(rs_plan_s); free_cpumask_var(new_cpu_mask); - crypto_free_hash(csums_tfm); - crypto_free_hash(verify_tfm); drbd_adm_finish(info, retcode); return 0; @@ -2307,6 +2467,9 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) goto nla_put_failure; + if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) + goto nla_put_failure; + if (got_ldev) if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive)) goto nla_put_failure; @@ -2314,9 +2477,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive)) goto nla_put_failure; - if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive)) - goto nla_put_failure; - nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; @@ -2532,7 +2692,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) /* resume from last known position, if possible */ struct start_ov_parms parms = { .ov_start_sector = mdev->ov_start_sector }; - int err = start_ov_parms_from_attrs(&parms, info->attrs); + int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); @@ -2568,7 +2728,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) mdev = adm_ctx.mdev; memset(&args, 0, sizeof(args)); if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { - err = new_c_uuid_parms_from_attrs(&args, info->attrs); + err = new_c_uuid_parms_from_attrs(&args, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 50c52712715e..c8c826b2444f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -763,7 +763,7 @@ int drbd_connected(int vnr, void *p, void *data) &mdev->tconn->cstate_mutex : &mdev->own_state_mutex; - ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); + ok &= drbd_send_sync_param(mdev); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); ok &= drbd_send_state(mdev); @@ -2085,7 +2085,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) int throttle = 0; /* feature disabled? */ - if (mdev->sync_conf.c_min_rate == 0) + if (mdev->ldev->dc.c_min_rate == 0) return 0; spin_lock_irq(&mdev->al_lock); @@ -2125,7 +2125,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) db = mdev->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); - if (dbdt > mdev->sync_conf.c_min_rate) + if (dbdt > mdev->ldev->dc.c_min_rate) throttle = 1; } return throttle; @@ -3001,7 +3001,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size) return false; - mdev->sync_conf.rate = be32_to_cpu(p->rate); + if (get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + put_ldev(mdev); + } if (apv >= 88) { if (apv == 88) { @@ -3029,10 +3032,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, p->csums_alg[SHARED_SECRET_MAX-1] = 0; } - if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { + if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg, p->verify_alg); goto disconnect; } verify_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3043,10 +3046,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { + if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) { if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", - mdev->sync_conf.csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg, p->csums_alg); goto disconnect; } csums_tfm = drbd_crypto_alloc_digest_safe(mdev, @@ -3057,37 +3060,39 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, } } - if (apv > 94) { - mdev->sync_conf.rate = be32_to_cpu(p->rate); - mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); - mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); - mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); - mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); + if (apv > 94 && get_ldev(mdev)) { + mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate); + mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); + mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target); + mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target); + mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate); - fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; + fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); if (!rs_plan_s) { dev_err(DEV, "kmalloc of fifo_buffer failed"); + put_ldev(mdev); goto disconnect; } } + put_ldev(mdev); } spin_lock(&mdev->peer_seq_lock); /* lock against drbd_nl_syncer_conf() */ if (verify_tfm) { - strcpy(mdev->sync_conf.verify_alg, p->verify_alg); - mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_hash(mdev->verify_tfm); - mdev->verify_tfm = verify_tfm; + strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg); + mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1; + crypto_free_hash(mdev->tconn->verify_tfm); + mdev->tconn->verify_tfm = verify_tfm; dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(mdev->sync_conf.csums_alg, p->csums_alg); - mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_hash(mdev->csums_tfm); - mdev->csums_tfm = csums_tfm; + strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg); + mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1; + crypto_free_hash(mdev->tconn->csums_tfm); + mdev->tconn->csums_tfm = csums_tfm; dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); } if (fifo_size != mdev->rs_plan_s.size) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 11685658659e..77fad527fb1d 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -402,7 +402,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_CONNECTED_OUTDATES; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->sync_conf.verify_alg[0] == 0)) + (mdev->tconn->net_conf->verify_alg[0] == 0)) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && @@ -668,7 +668,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && + if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a730520e468e..005876b32f74 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -310,12 +310,12 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); /* Free peer_req and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on @@ -451,13 +451,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev) spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ - steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ + steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ - want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps; + want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps; } else { /* normal path */ - want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target : - sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10); + want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target : + sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10); } correction = want - mdev->rs_in_flight - mdev->rs_planed; @@ -476,7 +476,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev) if (req_sect < 0) req_sect = 0; - max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ; + max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ; if (req_sect > max_sect) req_sect = max_sect; @@ -492,11 +492,11 @@ static int drbd_rs_controller(struct drbd_conf *mdev) static int drbd_rs_number_requests(struct drbd_conf *mdev) { int number; - if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ + if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */ number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { - mdev->c_sync_rate = mdev->sync_conf.rate; + mdev->c_sync_rate = mdev->ldev->dc.resync_rate; number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); } @@ -619,7 +619,7 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) { + if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) { switch (read_for_csum(mdev, sector, size)) { case -EIO: /* Disk failure */ put_ldev(mdev); @@ -810,7 +810,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (mdev->csums_tfm && mdev->rs_total) { + if (mdev->tconn->csums_tfm && mdev->rs_total) { const unsigned long s = mdev->rs_same_csum; const unsigned long t = mdev->rs_total; const int ratio = @@ -1019,13 +1019,13 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) /* quick hack to try to avoid a race against reconfiguration. * a real fix would be much more involved, * introducing more locking mechanisms */ - if (mdev->csums_tfm) { - digest_size = crypto_hash_digestsize(mdev->csums_tfm); + if (mdev->tconn->csums_tfm) { + digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm); D_ASSERT(digest_size == di->digest_size); digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } @@ -1069,7 +1069,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { ok = 0; /* terminate the connection in case the allocation failed */ @@ -1077,7 +1077,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) } if (likely(!(peer_req->flags & EE_WAS_ERROR))) - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); else memset(digest, 0, digest_size); @@ -1141,10 +1141,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) di = peer_req->digest; if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); + drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest); D_ASSERT(digest_size == di->digest_size); eq = !memcmp(digest, di->digest, digest_size); @@ -1319,9 +1319,9 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) struct drbd_conf *odev = mdev; while (1) { - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return 1; - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1408,11 +1408,11 @@ static int sync_after_error(struct drbd_conf *mdev, int o_minor) return ERR_SYNC_AFTER_CYCLE; /* dependency chain ends here, no cycles. */ - if (odev->sync_conf.after == -1) + if (odev->ldev->dc.resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(odev->sync_conf.after); + odev = minor_to_mdev(odev->ldev->dc.resync_after); } } @@ -1424,7 +1424,7 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na) write_lock_irq(&global_state_lock); retcode = sync_after_error(mdev, na); if (retcode == NO_ERROR) { - mdev->sync_conf.after = na; + mdev->ldev->dc.resync_after = na; do { changes = _drbd_pause_after(mdev); changes |= _drbd_resume_next(mdev); @@ -1637,7 +1637,7 @@ int drbd_worker(struct drbd_thread *thi) struct drbd_work *w = NULL; struct drbd_conf *mdev; LIST_HEAD(work_list); - int minor, intr = 0; + int vnr, intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); @@ -1722,7 +1722,7 @@ int drbd_worker(struct drbd_thread *thi) spin_unlock_irq(&tconn->data.work.q_lock); drbd_thread_stop(&tconn->receiver); - idr_for_each_entry(&tconn->volumes, mdev, minor) { + idr_for_each_entry(&tconn->volumes, mdev, vnr) { D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. * wait here for the exiting receiver. */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index a07d69279b1a..938e8560a833 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -102,66 +102,73 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, - __u64_field(1, GENLA_F_MANDATORY, disk_size) - __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) - __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) - __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) - __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) + __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) + __u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + + /* use the resize command to try and change the disk_size */ + __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) + /* we could change the max_bio_bvecs, + * but it won't propagate through the stack */ + __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) __u32_field(7, GENLA_F_MANDATORY, fencing) - __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) - __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) - __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) - __flg_field(11, GENLA_F_MANDATORY, no_md_flush) - __flg_field(12, GENLA_F_MANDATORY, use_bmbv) + + __u32_field(8, GENLA_F_MANDATORY, resync_rate) + __u32_field(9, GENLA_F_MANDATORY, resync_after) + __u32_field(10, GENLA_F_MANDATORY, al_extents) + __u32_field(11, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(12, GENLA_F_MANDATORY, c_delay_target) + __u32_field(13, GENLA_F_MANDATORY, c_fill_target) + __u32_field(14, GENLA_F_MANDATORY, c_max_rate) + __u32_field(15, GENLA_F_MANDATORY, c_min_rate) + + __flg_field(16, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(17, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(18, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(19, GENLA_F_MANDATORY, no_md_flush) + ) -GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, - __u32_field(1, GENLA_F_MANDATORY, rate) - __u32_field(2, GENLA_F_MANDATORY, after) - __u32_field(3, GENLA_F_MANDATORY, al_extents) - __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) - __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __flg_field(7, GENLA_F_MANDATORY, use_rle) - __u32_field(8, GENLA_F_MANDATORY, on_no_data) - __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) - __u32_field(10, GENLA_F_MANDATORY, c_delay_target) - __u32_field(11, GENLA_F_MANDATORY, c_fill_target) - __u32_field(12, GENLA_F_MANDATORY, c_max_rate) - __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, + __str_field(1, GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field(2, GENLA_F_MANDATORY, on_no_data) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) + __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) + __str_field(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field(4, GENLA_F_REQUIRED, my_addr, 128) - __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) - __u32_field(6, GENLA_F_REQUIRED, wire_protocol) - __u32_field(7, GENLA_F_MANDATORY, try_connect_int) - __u32_field(8, GENLA_F_MANDATORY, timeout) - __u32_field(9, GENLA_F_MANDATORY, ping_int) - __u32_field(10, GENLA_F_MANDATORY, ping_timeo) - __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) - __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) - __u32_field(13, GENLA_F_MANDATORY, ko_count) - __u32_field(14, GENLA_F_MANDATORY, max_buffers) - __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) - __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) - __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) - __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) - __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) - __u32_field(20, GENLA_F_MANDATORY, rr_conflict) - __u32_field(21, GENLA_F_MANDATORY, on_congestion) - __u32_field(22, GENLA_F_MANDATORY, cong_fill) - __u32_field(23, GENLA_F_MANDATORY, cong_extents) - __flg_field(24, GENLA_F_MANDATORY, two_primaries) - __flg_field(25, GENLA_F_MANDATORY, want_lose) - __flg_field(26, GENLA_F_MANDATORY, no_cork) - __flg_field(27, GENLA_F_MANDATORY, always_asbp) - __flg_field(28, GENLA_F_MANDATORY, dry_run) + __str_field(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field(8, GENLA_F_MANDATORY, wire_protocol) + __u32_field(9, GENLA_F_MANDATORY, try_connect_int) + __u32_field(10, GENLA_F_MANDATORY, timeout) + __u32_field(11, GENLA_F_MANDATORY, ping_int) + __u32_field(12, GENLA_F_MANDATORY, ping_timeo) + __u32_field(13, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(14, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(15, GENLA_F_MANDATORY, ko_count) + __u32_field(16, GENLA_F_MANDATORY, max_buffers) + __u32_field(17, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(18, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(19, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(20, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(21, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(22, GENLA_F_MANDATORY, rr_conflict) + __u32_field(23, GENLA_F_MANDATORY, on_congestion) + __u32_field(24, GENLA_F_MANDATORY, cong_fill) + __u32_field(25, GENLA_F_MANDATORY, cong_extents) + __flg_field(26, GENLA_F_MANDATORY, two_primaries) + __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) + __flg_field(28, GENLA_F_MANDATORY, no_cork) + __flg_field(29, GENLA_F_MANDATORY, always_asbp) + __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) + __flg_field(31, GENLA_F_MANDATORY, use_rle) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, @@ -270,11 +277,10 @@ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on replication links */ -GENL_op(DRBD_ADM_SYNCER, 9, - GENL_doit(drbd_adm_syncer), +GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, + GENL_doit(drbd_adm_resource_opts), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY) ) GENL_op( @@ -284,16 +290,28 @@ GENL_op( GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) ) +GENL_op( + DRBD_ADM_CHG_NET_OPTS, 29, + GENL_doit(drbd_adm_net_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) - /* operates on minors */ GENL_op(DRBD_ADM_ATTACH, 12, GENL_doit(drbd_adm_attach), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) ) +GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, + GENL_doit(drbd_adm_disk_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED) +) + GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), @@ -301,7 +319,6 @@ GENL_op( GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) ) - /* operates on all volumes within a resource */ GENL_op( DRBD_ADM_PRIMARY, 14, GENL_doit(drbd_adm_set_role), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 22920a8af4e2..659a8eb38830 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -166,5 +166,7 @@ #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_PROTOCOL_DEF DRBD_PROT_C + #undef RANGE #endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index c8c67239f616..e458282a3728 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -190,11 +190,12 @@ static struct nlattr *nested_attr_tb[128]; #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ - /* static, potentially unused */ \ -int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +/* *_from_attrs functions are static, but potentially unused */ \ +static int __ ## s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info, bool exclude_invariants) \ { \ const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ - struct nlattr *tla = tb[tag_number]; \ + struct nlattr *tla = info->attrs[tag_number]; \ struct nlattr **ntb = nested_attr_tb; \ struct nlattr *nla; \ int err; \ @@ -211,33 +212,49 @@ int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ \ s_fields \ return 0; \ -} +} __attribute__((unused)) \ +static int s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, false); \ +} __attribute__((unused)) \ +static int s_name ## _from_attrs_for_change(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, true); \ +} __attribute__((unused)) \ -#undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ nla = ntb[__nla_type(attr_nr)]; \ if (nla) { \ - if (s) \ - s->name = __get(nla); \ - DPRINT_FIELD("<<", nla_type, name, s, nla); \ + if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + pr_info("<< must not change invariant attr: %s\n", #name); \ + return -EEXIST; \ + } \ + assignment; \ + } else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + /* attribute missing from payload, */ \ + /* which was expected */ \ } else if ((attr_flag) & GENLA_F_REQUIRED) { \ pr_info("<< missing attr: %s\n", #name); \ return -ENOMSG; \ } +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla)) + /* validate_nla() already checked nla_len <= maxlen appropriately. */ #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ - nla = ntb[__nla_type(attr_nr)]; \ - if (nla) { \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name ## _len = \ __get(s->name, nla, maxlen); \ - DPRINT_ARRAY("<<", nla_type, name, s, nla); \ - } else if ((attr_flag) & GENLA_F_REQUIRED) { \ - pr_info("<< missing attr: %s\n", #name); \ - return -ENOMSG; \ - } \ + DPRINT_ARRAY("<<", nla_type, name, s, nla)) #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 745ebfd6c7e5..9a605b9ee834 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -59,12 +59,20 @@ enum { GENLA_F_MANDATORY = 1 << 14, GENLA_F_REQUIRED = 1 << 15, - /* This will not be present in the __u16 .nla_type, but can be - * triggered on in _to_skb, to exclude "sensitive" - * information from broadcasts, or on unpriviledged get requests. - * This is useful because genetlink multicast groups can be listened in - * on by anyone. */ + /* Below will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb resp. _from_attrs */ + + /* To exclude "sensitive" information from broadcasts, or on + * unpriviledged get requests. This is useful because genetlink + * multicast groups can be listened in on by anyone. */ GENLA_F_SENSITIVE = 1 << 16, + + /* INVARIAN options cannot be changed at runtime. + * Useful to share an attribute policy and struct definition, + * between some "create" and "change" commands, + * but disallow certain fields to be changed online. + */ + GENLA_F_INVARIANT = 1 << 17, }; #define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) -- cgit v1.2.3 From cb703454a283d8dd5599e928eeea30367ca18874 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 24 Mar 2011 11:03:07 +0100 Subject: drbd: Converted drbd_try_outdate_peer() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_nl.c | 144 ++++++++++++++++++------------------- drivers/block/drbd/drbd_receiver.c | 7 +- drivers/block/drbd/drbd_state.c | 85 ++++++++++++++++++---- drivers/block/drbd/drbd_state.h | 5 ++ include/linux/drbd.h | 3 +- 6 files changed, 152 insertions(+), 96 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c1eb4462096e..74637cc1461c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1472,8 +1472,8 @@ extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force); -extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); -extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev); +extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); +extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f1ec727f7df5..85290a9beb6d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -366,116 +366,122 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd) return ret; } -enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) +static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn) { + enum drbd_fencing_p fp = FP_NOT_AVAIL; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) { + if (get_ldev_if_state(mdev, D_CONSISTENT)) { + fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing); + put_ldev(mdev); + } + } + + return fp; +} + +bool conn_try_outdate_peer(struct drbd_tconn *tconn) +{ + union drbd_state mask = { }; + union drbd_state val = { }; + enum drbd_fencing_p fp; char *ex_to_string; int r; - enum drbd_disk_state nps; - enum drbd_fencing_p fp; - D_ASSERT(mdev->state.pdsk == D_UNKNOWN); + if (tconn->cstate >= C_WF_REPORT_PARAMS) { + conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n"); + return false; + } - if (get_ldev_if_state(mdev, D_CONSISTENT)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } else { - dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); - nps = mdev->state.pdsk; + fp = highest_fencing_policy(tconn); + switch (fp) { + case FP_NOT_AVAIL: + conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n"); goto out; + case FP_DONT_CARE: + return true; + default: ; } - r = drbd_khelper(mdev, "fence-peer"); + r = conn_khelper(tconn, "fence-peer"); switch ((r>>8) & 0xff) { case 3: /* peer is inconsistent */ ex_to_string = "peer is inconsistent or worse"; - nps = D_INCONSISTENT; + mask.pdsk = D_MASK; + val.pdsk = D_INCONSISTENT; break; case 4: /* peer got outdated, or was already outdated */ ex_to_string = "peer was fenced"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; break; case 5: /* peer was down */ - if (mdev->state.disk == D_UP_TO_DATE) { + if (conn_highest_disk(tconn) == D_UP_TO_DATE) { /* we will(have) create(d) a new UUID anyways... */ ex_to_string = "peer is unreachable, assumed to be dead"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; } else { ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; - nps = mdev->state.pdsk; } break; case 6: /* Peer is primary, voluntarily outdate myself. * This is useful when an unconnected R_SECONDARY is asked to * become R_PRIMARY, but finds the other peer being active. */ ex_to_string = "peer is active"; - dev_warn(DEV, "Peer is primary, outdating myself.\n"); - nps = D_UNKNOWN; - _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE); + conn_warn(tconn, "Peer is primary, outdating myself.\n"); + mask.disk = D_MASK; + val.disk = D_OUTDATED; break; case 7: if (fp != FP_STONITH) - dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); + conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n"); ex_to_string = "peer was stonithed"; - nps = D_OUTDATED; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; break; default: /* The script is broken ... */ - nps = D_UNKNOWN; - dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); - return nps; + conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); + return false; /* Eventually leave IO frozen */ } - dev_info(DEV, "fence-peer helper returned %d (%s)\n", - (r>>8) & 0xff, ex_to_string); + conn_info(tconn, "fence-peer helper returned %d (%s)\n", + (r>>8) & 0xff, ex_to_string); -out: - if (mdev->state.susp_fen && nps >= D_UNKNOWN) { - /* The handler was not successful... unfreeze here, the - state engine can not unfreeze... */ - _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE); - } + out: - return nps; + /* Not using + conn_request_state(tconn, mask, val, CS_VERBOSE); + here, because we might were able to re-establish the connection in the + meantime. */ + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate < C_WF_REPORT_PARAMS) + _conn_request_state(tconn, mask, val, CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); + + return conn_highest_pdsk(tconn) <= D_OUTDATED; } static int _try_outdate_peer_async(void *data) { - struct drbd_conf *mdev = (struct drbd_conf *)data; - enum drbd_disk_state nps; - union drbd_state ns; + struct drbd_tconn *tconn = (struct drbd_tconn *)data; - nps = drbd_try_outdate_peer(mdev); - - /* Not using - drbd_request_state(mdev, NS(pdsk, nps)); - here, because we might were able to re-establish the connection - in the meantime. This can only partially be solved in the state's - engine is_valid_state() and is_valid_state_transition() - functions. - - nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN. - pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, - therefore we have to have the pre state change check here. - */ - spin_lock_irq(&mdev->tconn->req_lock); - ns = mdev->state; - if (ns.conn < C_WF_REPORT_PARAMS) { - ns.pdsk = nps; - _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - } - spin_unlock_irq(&mdev->tconn->req_lock); + conn_try_outdate_peer(tconn); return 0; } -void drbd_try_outdate_peer_async(struct drbd_conf *mdev) +void conn_try_outdate_peer_async(struct drbd_tconn *tconn) { struct task_struct *opa; - opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev)); + opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h"); if (IS_ERR(opa)) - dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); + conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n"); } enum drbd_state_rv @@ -486,7 +492,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) int try = 0; int forced = 0; union drbd_state mask, val; - enum drbd_disk_state nps; if (new_role == R_PRIMARY) request_ping(mdev->tconn); /* Detect a dead peer ASAP */ @@ -519,32 +524,23 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_NO_UP_TO_DATE_DISK && mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { D_ASSERT(mdev->state.pdsk == D_UNKNOWN); - nps = drbd_try_outdate_peer(mdev); - if (nps == D_OUTDATED || nps == D_INCONSISTENT) { + if (conn_try_outdate_peer(mdev->tconn)) { val.disk = D_UP_TO_DATE; mask.disk = D_MASK; } - - val.pdsk = nps; - mask.pdsk = D_MASK; - continue; } if (rv == SS_NOTHING_TO_DO) goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { - nps = drbd_try_outdate_peer(mdev); - - if (force && nps > D_OUTDATED) { + if (!conn_try_outdate_peer(mdev->tconn) && force) { dev_warn(DEV, "Forced into split brain situation!\n"); - nps = D_OUTDATED; - } - - mask.pdsk = D_MASK; - val.pdsk = nps; + mask.pdsk = D_MASK; + val.pdsk = D_OUTDATED; + } continue; } if (rv == SS_TWO_PRIMARIES) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1fd871bc889e..91aa49f478e8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4030,9 +4030,11 @@ static void drbd_disconnect(struct drbd_tconn *tconn) drbd_free_sock(tconn); idr_for_each(&tconn->volumes, drbd_disconnected, tconn); - conn_info(tconn, "Connection closed\n"); + if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN) + conn_try_outdate_peer_async(tconn); + spin_lock_irq(&tconn->req_lock); oc = tconn->cstate; if (oc >= C_UNCONNECTED) @@ -4109,9 +4111,6 @@ static int drbd_disconnected(int vnr, void *p, void *data) put_ldev(mdev); } - if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) - drbd_try_outdate_peer_async(mdev); - /* serialize with bitmap writeout triggered by the state change, * if any. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 52ff1c7379e9..b4f668db3296 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -61,6 +61,73 @@ bool conn_all_vols_unconf(struct drbd_tconn *tconn) return true; } +/* Unfortunately the states where not correctly ordered, when + they where defined. therefore can not use max_t() here. */ +static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2) +{ + if (role1 == R_PRIMARY || role2 == R_PRIMARY) + return R_PRIMARY; + if (role1 == R_SECONDARY || role2 == R_SECONDARY) + return R_SECONDARY; + return R_UNKNOWN; +} +static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) +{ + if (role1 == R_UNKNOWN || role2 == R_UNKNOWN) + return R_UNKNOWN; + if (role1 == R_SECONDARY || role2 == R_SECONDARY) + return R_SECONDARY; + return R_PRIMARY; +} + +enum drbd_role conn_highest_role(struct drbd_tconn *tconn) +{ + enum drbd_role role = R_UNKNOWN; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + role = max_role(role, mdev->state.role); + + return role; +} + +enum drbd_role conn_highest_peer(struct drbd_tconn *tconn) +{ + enum drbd_role peer = R_UNKNOWN; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + peer = max_role(peer, mdev->state.peer); + + return peer; +} + +enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn) +{ + enum drbd_disk_state ds = D_DISKLESS; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + ds = max_t(enum drbd_disk_state, ds, mdev->state.disk); + + return ds; +} + +enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn) +{ + enum drbd_disk_state ds = D_DISKLESS; + struct drbd_conf *mdev; + int vnr; + + idr_for_each_entry(&tconn->volumes, mdev, vnr) + ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk); + + return ds; +} + /** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. @@ -329,18 +396,6 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio dev_info(DEV, "%s\n", pb); } -static bool vol_has_primary_peer(struct drbd_tconn *tconn) -{ - struct drbd_conf *mdev; - int vnr; - - idr_for_each_entry(&tconn->volumes, mdev, vnr) { - if (mdev->state.peer == R_PRIMARY) - return true; - } - return false; -} - /** * is_valid_state() - Returns an SS_ error code if ns is not valid * @mdev: DRBD device. @@ -364,7 +419,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) { if (ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; - else if (vol_has_primary_peer(mdev->tconn)) + else if (conn_highest_peer(mdev->tconn) == R_PRIMARY) rv = SS_O_VOL_PEER_PRI; } put_net_conf(mdev->tconn); @@ -1390,8 +1445,8 @@ static int _set_state_itr_fn(int vnr, void *p, void *data) rv = __drbd_set_state(mdev, ns, flags, NULL); - ms.role = max_t(enum drbd_role, mdev->state.role, ms.role); - ms.peer = max_t(enum drbd_role, mdev->state.peer, ms.peer); + ms.role = max_role(ns.role, ms.role); + ms.peer = max_role(ns.peer, ms.peer); ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk); ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk); params->ms = ms; diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 55df0728bc88..394a1998acd9 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -110,4 +110,9 @@ static inline int drbd_request_state(struct drbd_conf *mdev, return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); } +enum drbd_role conn_highest_role(struct drbd_tconn *tconn); +enum drbd_role conn_highest_peer(struct drbd_tconn *tconn); +enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn); +enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn); + #endif diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9cdb888607ae..60d308819096 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -65,7 +65,8 @@ enum drbd_io_error_p { }; enum drbd_fencing_p { - FP_DONT_CARE, + FP_NOT_AVAIL = -1, /* Not a policy */ + FP_DONT_CARE = 0, FP_RESOURCE, FP_STONITH }; -- cgit v1.2.3 From 0c8e36d9b843be56e4e43d4ef3c3eb6a97205599 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 30 Mar 2011 16:00:17 +0200 Subject: drbd: Introduce protocol version 100 headers The 8 byte header finally becomes too small. With the protocol 100 header we have 16 bit for the volume number, proper 32 bit for the data length, and 32 bit for further extensions in the future. Previous versions of drbd are using version 80 headers for all packets short enough for protocol 80. They support both header versions in worker context, but only version 80 headers in asynchronous context. For backwards compatibility, continue to use version 80 headers for short packets before protocol version 100. From protocol version 100 on, use the same header version for all packets. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++++++ drivers/block/drbd/drbd_main.c | 32 ++++++++++++++++++++++++++------ drivers/block/drbd/drbd_nl.c | 3 +-- drivers/block/drbd/drbd_receiver.c | 14 ++++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 2 -- include/linux/drbd_limits.h | 2 ++ 7 files changed, 50 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6d55bb75a081..bf1aad683387 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -307,6 +307,14 @@ struct p_header95 { u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ } __packed; +struct p_header100 { + u32 magic; + u16 volume; + u16 command; + u32 length; + u32 pad; +} __packed; + extern unsigned int drbd_header_size(struct drbd_tconn *tconn); /* these defines must not be changed without changing the protocol version */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b9dcc50135c4..5d9112cefcd7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -698,9 +698,15 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) */ unsigned int drbd_header_size(struct drbd_tconn *tconn) { - BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); - BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); - return sizeof(struct p_header80); + if (tconn->agreed_pro_version >= 100) { + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8)); + return sizeof(struct p_header100); + } else { + BUILD_BUG_ON(sizeof(struct p_header80) != + sizeof(struct p_header95)); + BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); + return sizeof(struct p_header80); + } } static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) @@ -719,10 +725,24 @@ static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, return sizeof(struct p_header95); } -static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, void *buffer, - enum drbd_packet cmd, int size) +static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd, + int size, int vnr) +{ + h->magic = cpu_to_be32(DRBD_MAGIC_100); + h->volume = cpu_to_be16(vnr); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); + h->pad = 0; + return sizeof(struct p_header100); +} + +static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, + void *buffer, enum drbd_packet cmd, int size) { - if (tconn->agreed_pro_version >= 95) + if (tconn->agreed_pro_version >= 100) + return prepare_header100(buffer, cmd, size, vnr); + else if (tconn->agreed_pro_version >= 95 && + size > DRBD_MAX_SIZE_H80_PACKET) return prepare_header95(buffer, cmd, size); else return prepare_header80(buffer, cmd, size); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d9bb1a5c756a..0f52b88719c8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2833,8 +2833,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info) retcode = ERR_INVALID_REQUEST; goto out; } - /* FIXME we need a define here */ - if (adm_ctx.volume >= 256) { + if (adm_ctx.volume > DRBD_VOLUME_MAX) { drbd_msg_put_info("requested volume id out of range"); retcode = ERR_INVALID_REQUEST; goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7e0ab2246fb6..311b95453cb7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -983,8 +983,18 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i { unsigned int header_size = drbd_header_size(tconn); - if (header_size == sizeof(struct p_header95) && - *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { + if (header_size == sizeof(struct p_header100) && + *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { + struct p_header100 *h = header; + if (h->pad != 0) { + conn_err(tconn, "Header padding is not zero\n"); + return -EINVAL; + } + pi->vnr = be16_to_cpu(h->volume); + pi->cmd = be16_to_cpu(h->command); + pi->size = be32_to_cpu(h->length); + } else if (header_size == sizeof(struct p_header95) && + *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { struct p_header95 *h = header; pi->cmd = be16_to_cpu(h->command); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 60d308819096..fe8d6ba31bcb 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -341,6 +341,7 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +#define DRBD_MAGIC_100 0x8620ec20 /* how I came up with this magic? * base64 decode "actlog==" ;) */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 938e8560a833..10144d546a66 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -95,8 +95,6 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - /* currently only 256 volumes per group, - * but maybe we still change that */ __u32_field(1, GENLA_F_MANDATORY, ctx_volume) __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) ) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 659a8eb38830..7f5149bef70e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -19,6 +19,8 @@ #define DRBD_MINOR_COUNT_MAX 256 #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_VOLUME_MAX 65535 + #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 -- cgit v1.2.3 From b032b6fa3528d6eed972db32257cb316a66e0dac Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Apr 2011 18:16:10 -0700 Subject: drbd: Allow online change of replication protocol only with agreed_pv >= 100 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 +++++ include/linux/drbd.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 40de384aade6..d4b29fd603f4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1671,6 +1671,11 @@ check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf) struct drbd_conf *mdev; int i; + if (tconn->net_conf && tconn->agreed_pro_version < 100 && + tconn->cstate == C_WF_REPORT_PARAMS && + new_conf->wire_protocol != tconn->net_conf->wire_protocol) + return ERR_NEED_APV_100; + if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) return ERR_NOT_PROTO_C; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index fe8d6ba31bcb..6c7c85d8fc41 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -160,6 +160,7 @@ enum drbd_ret_code { ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, + ERR_NEED_APV_100 = 163, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3 From d8cd289dbe69ce9b8115d6f200ceff657e5dafa0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 May 2011 12:27:11 +0200 Subject: drbd: Remove left-over unused define Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7f5149bef70e..bcebb016fda3 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,5 +170,4 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C -#undef RANGE #endif -- cgit v1.2.3 From b966b5dd8e17e6c105ca55533fd412de5d5b429e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 May 2011 14:56:09 +0200 Subject: drbd: Generate the drbd_set_*_defaults() functions from drbd_genl.h Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 86 +++++------------------------------- include/linux/drbd_genl.h | 91 +++++++++++++++++++-------------------- include/linux/genl_magic_func.h | 26 +++++++++++ include/linux/genl_magic_struct.h | 8 ++++ 6 files changed, 91 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 44f77265d2b0..8655fcb82028 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1390,7 +1390,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); /* drbd_nl.c */ -extern void drbd_set_res_opts_default(struct res_opts *r); +extern void drbd_set_res_opts_defaults(struct res_opts *r); extern int drbd_msg_put_info(const char *info); extern void drbd_suspend_io(struct drbd_conf *mdev); extern void drbd_resume_io(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 427e959e4869..4ae3e7a99d7c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2488,7 +2488,7 @@ struct drbd_tconn *conn_create(const char *name) drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); - drbd_set_res_opts_default(&tconn->res_opts); + drbd_set_res_opts_defaults(&tconn->res_opts); down_write(&drbd_cfg_rwsem); kref_init(&tconn->kref); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7320ac00f0fb..f5732cf46c2f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1090,77 +1090,6 @@ static bool should_set_defaults(struct genl_info *info) return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS); } -/* Maybe we should we generate these functions - * from the drbd_genl.h magic as well? - * That way we would not "accidentally forget" to add defaults here. */ - -#define RESET_ARRAY_FIELD(field) do { \ - memset(field, 0, sizeof(field)); \ - field ## _len = 0; \ -} while (0) -void drbd_set_res_opts_default(struct res_opts *r) -{ - RESET_ARRAY_FIELD(r->cpu_mask); - r->on_no_data = DRBD_ON_NO_DATA_DEF; -} - -static void drbd_set_net_conf_defaults(struct net_conf *nc) -{ - /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT - * in drbd_genl.h, they can only be change with disconnect/reconnect */ - RESET_ARRAY_FIELD(nc->shared_secret); - - RESET_ARRAY_FIELD(nc->cram_hmac_alg); - RESET_ARRAY_FIELD(nc->integrity_alg); - RESET_ARRAY_FIELD(nc->verify_alg); - RESET_ARRAY_FIELD(nc->csums_alg); -#undef RESET_ARRAY_FIELD - - nc->wire_protocol = DRBD_PROTOCOL_DEF; - nc->try_connect_int = DRBD_CONNECT_INT_DEF; - nc->timeout = DRBD_TIMEOUT_DEF; - nc->ping_int = DRBD_PING_INT_DEF; - nc->ping_timeo = DRBD_PING_TIMEO_DEF; - nc->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - nc->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; - nc->ko_count = DRBD_KO_COUNT_DEF; - nc->max_buffers = DRBD_MAX_BUFFERS_DEF; - nc->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - nc->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - nc->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - nc->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - nc->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - nc->rr_conflict = DRBD_RR_CONFLICT_DEF; - nc->on_congestion = DRBD_ON_CONGESTION_DEF; - nc->cong_fill = DRBD_CONG_FILL_DEF; - nc->cong_extents = DRBD_CONG_EXTENTS_DEF; - nc->two_primaries = 0; - nc->no_cork = 0; - nc->always_asbp = 0; - nc->use_rle = 0; -} - -static void drbd_set_disk_conf_defaults(struct disk_conf *dc) -{ - /* Do NOT (re)set those fields marked as GENLA_F_INVARIANT - * in drbd_genl.h, they can only be change with detach/reattach */ - dc->on_io_error = DRBD_ON_IO_ERROR_DEF; - dc->fencing = DRBD_FENCING_DEF; - dc->resync_rate = DRBD_RATE_DEF; - dc->resync_after = DRBD_AFTER_DEF; - dc->al_extents = DRBD_AL_EXTENTS_DEF; - dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; - dc->c_delay_target = DRBD_C_DELAY_TARGET_DEF; - dc->c_fill_target = DRBD_C_FILL_TARGET_DEF; - dc->c_max_rate = DRBD_C_MAX_RATE_DEF; - dc->c_min_rate = DRBD_C_MIN_RATE_DEF; - dc->no_disk_barrier = 0; - dc->no_disk_flush = 0; - dc->no_disk_drain = 0; - dc->no_md_flush = 0; -} - - int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -1198,7 +1127,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf)); if (should_set_defaults(info)) - drbd_set_disk_conf_defaults(new_disk_conf); + set_disk_conf_defaults(new_disk_conf); err = disk_conf_from_attrs_for_change(new_disk_conf, info); if (err) { @@ -1315,7 +1244,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - drbd_set_disk_conf_defaults(&nbc->dc); + set_disk_conf_defaults(&nbc->dc); err = disk_conf_from_attrs(&nbc->dc, info); if (err) { @@ -1911,7 +1840,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) *new_conf = *old_conf; if (should_set_defaults(info)) - drbd_set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_conf); err = net_conf_from_attrs_for_change(new_conf, info); if (err) { @@ -2029,7 +1958,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - drbd_set_net_conf_defaults(new_conf); + set_net_conf_defaults(new_conf); err = net_conf_from_attrs(new_conf, info); if (err) { @@ -2301,6 +2230,11 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) return 0; } +void drbd_set_res_opts_defaults(struct res_opts *r) +{ + return set_res_opts_defaults(r); +} + int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -2325,7 +2259,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) res_opts = tconn->res_opts; if (should_set_defaults(info)) - drbd_set_res_opts_default(&res_opts); + set_res_opts_defaults(&res_opts); err = res_opts_from_attrs(&res_opts, info); if (err) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 10144d546a66..549800668cb9 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -110,63 +110,62 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, * but it won't propagate through the stack */ __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) - __u32_field(6, GENLA_F_MANDATORY, on_io_error) - __u32_field(7, GENLA_F_MANDATORY, fencing) - - __u32_field(8, GENLA_F_MANDATORY, resync_rate) - __u32_field(9, GENLA_F_MANDATORY, resync_after) - __u32_field(10, GENLA_F_MANDATORY, al_extents) - __u32_field(11, GENLA_F_MANDATORY, c_plan_ahead) - __u32_field(12, GENLA_F_MANDATORY, c_delay_target) - __u32_field(13, GENLA_F_MANDATORY, c_fill_target) - __u32_field(14, GENLA_F_MANDATORY, c_max_rate) - __u32_field(15, GENLA_F_MANDATORY, c_min_rate) - - __flg_field(16, GENLA_F_MANDATORY, no_disk_barrier) - __flg_field(17, GENLA_F_MANDATORY, no_disk_flush) - __flg_field(18, GENLA_F_MANDATORY, no_disk_drain) - __flg_field(19, GENLA_F_MANDATORY, no_md_flush) - + __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) + + __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RATE_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) + __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + + __flg_field_def(16, GENLA_F_MANDATORY, no_disk_barrier, 0) + __flg_field_def(17, GENLA_F_MANDATORY, no_disk_flush, 0) + __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) + __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, - __str_field(1, GENLA_F_MANDATORY, cpu_mask, 32) - __u32_field(2, GENLA_F_MANDATORY, on_no_data) + __str_field_def(1, GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field_def(2, GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) - __str_field(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __str_field_def(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field(8, GENLA_F_MANDATORY, wire_protocol) - __u32_field(9, GENLA_F_MANDATORY, try_connect_int) - __u32_field(10, GENLA_F_MANDATORY, timeout) - __u32_field(11, GENLA_F_MANDATORY, ping_int) - __u32_field(12, GENLA_F_MANDATORY, ping_timeo) - __u32_field(13, GENLA_F_MANDATORY, sndbuf_size) - __u32_field(14, GENLA_F_MANDATORY, rcvbuf_size) - __u32_field(15, GENLA_F_MANDATORY, ko_count) - __u32_field(16, GENLA_F_MANDATORY, max_buffers) - __u32_field(17, GENLA_F_MANDATORY, max_epoch_size) - __u32_field(18, GENLA_F_MANDATORY, unplug_watermark) - __u32_field(19, GENLA_F_MANDATORY, after_sb_0p) - __u32_field(20, GENLA_F_MANDATORY, after_sb_1p) - __u32_field(21, GENLA_F_MANDATORY, after_sb_2p) - __u32_field(22, GENLA_F_MANDATORY, rr_conflict) - __u32_field(23, GENLA_F_MANDATORY, on_congestion) - __u32_field(24, GENLA_F_MANDATORY, cong_fill) - __u32_field(25, GENLA_F_MANDATORY, cong_extents) - __flg_field(26, GENLA_F_MANDATORY, two_primaries) + __str_field_def(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, try_connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(13, GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(14, GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(15, GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(16, GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(17, GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(18, GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(19, GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(20, GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(21, GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(22, GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field(28, GENLA_F_MANDATORY, no_cork) - __flg_field(29, GENLA_F_MANDATORY, always_asbp) + __flg_field_def(28, GENLA_F_MANDATORY, no_cork, 0) + __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field(31, GENLA_F_MANDATORY, use_rle) + __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index e458282a3728..e908f1c50355 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -427,6 +427,32 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #include GENL_MAGIC_INCLUDE_FILE + +/* Functions for initializing structs to default values. */ + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) +#undef __u32_field_def +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __flg_field_def +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __str_field_def +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + memset(x->name, 0, sizeof(x->name)); \ + x->name ## _len = 0; +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \ +static void set_ ## s_name ## _defaults(struct s_name *x) { \ +s_fields \ +} + +#include GENL_MAGIC_INCLUDE_FILE + #endif /* __KERNEL__ */ /* }}}1 */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 9a605b9ee834..f2c7cc7831df 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -107,6 +107,14 @@ enum { __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ nla_memcpy, NLA_PUT) +/* fields with default values */ +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + __flg_field(attr_nr, attr_flag, name) +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + __u32_field(attr_nr, attr_flag, name) +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + __str_field(attr_nr, attr_flag, name, maxlen) + #define __nla_put_flag(skb, attrtype, value) \ do { \ if (value) \ -- cgit v1.2.3 From 563e4cf25ec804eb02cd30a41baa2fcc6c06679b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 4 May 2011 10:33:52 +0200 Subject: drbd: Introduce __s32_field in the genetlink macro magic ...and drop explicit typecasts (int)meta_dev_idx < 0. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 ++++---- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_struct.h | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a1854e3aa15e..b8ea4807c981 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1253,7 +1253,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -1289,7 +1289,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) */ bdev = blkdev_get_by_path(nbc->dc.meta_dev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - ((int)nbc->dc.meta_dev_idx < 0) ? + (nbc->dc.meta_dev_idx < 0) ? (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, @@ -1325,7 +1325,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; } - if ((int)nbc->dc.meta_dev_idx < 0) { + if (nbc->dc.meta_dev_idx < 0) { max_possible_sectors = DRBD_MAX_SECTORS_FLEX; /* at least one MB, otherwise it does not make sense */ min_md_device_sectors = (2<<10); @@ -1356,7 +1356,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_warn(DEV, "==> truncating very big lower level device " "to currently maximum possible %llu sectors <==\n", (unsigned long long) max_possible_sectors); - if ((int)nbc->dc.meta_dev_idx >= 0) + if (nbc->dc.meta_dev_idx >= 0) dev_warn(DEV, "==>> using internal or flexible " "meta data may help <<==\n"); } diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 549800668cb9..f143e3c0f33b 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -102,7 +102,7 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) - __u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + __s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) /* use the resize command to try and change the disk_size */ __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f2c7cc7831df..ddbdd0a24476 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -97,6 +97,9 @@ enum { #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ nla_get_u32, NLA_PUT_U32) +#define __s32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ + nla_get_u32, NLA_PUT_U32) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ nla_get_u64, NLA_PUT_U64) -- cgit v1.2.3 From a5d8e1fb9d22851de89bbf52db6b11c56b895dd4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 16:06:51 +0200 Subject: drbd: Convert boolean flags on netlink from NLA_FLAG to NLA_U8 Flags of type NLA_FLAG are either present or absent, but do not have a value by themselves. Use type NLA_U8 for our boolean flags instead, and use the value to determine if the flag should be on or off. On the drbdsetup command line, all those flags have an optional yes/no argument which defaults to yes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_struct.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index ddbdd0a24476..b1ddbb5bd725 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -86,8 +86,8 @@ enum { /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ - __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ - nla_get_flag, __nla_put_flag) + __field(attr_nr, attr_flag, name, NLA_U8, char, \ + nla_get_u8, NLA_PUT_U8) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ nla_get_u8, NLA_PUT_U8) @@ -118,12 +118,6 @@ enum { #define __str_field_def(attr_nr, attr_flag, name, maxlen) \ __str_field(attr_nr, attr_flag, name, maxlen) -#define __nla_put_flag(skb, attrtype, value) \ - do { \ - if (value) \ - NLA_PUT_FLAG(skb, attrtype); \ - } while (0) - #define GENL_op_init(args...) args #define GENL_doit(handler) \ .doit = handler, \ -- cgit v1.2.3 From 66b2f6b9c59c5e7003e13281dfe72e174f93988c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-disk-flushes into disk-flushes={yes|no} Change the --no-disk-flushes drbdsetup command line option as well as the no_disk_flush netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 83d39859a9fe..e7a6eeae94e2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1179,7 +1179,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) rcu_read_lock(); dc = rcu_dereference(mdev->ldev->disk_conf); - if (wo == WO_bdev_flush && dc->no_disk_flush) + if (wo == WO_bdev_flush && !dc->disk_flushes) wo = WO_drain_io; if (wo == WO_drain_io && dc->no_disk_drain) wo = WO_none; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index f143e3c0f33b..945c4dd3470c 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -122,8 +122,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, no_disk_barrier, 0) - __flg_field_def(17, GENLA_F_MANDATORY, no_disk_flush, 0) + __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) + __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) -- cgit v1.2.3 From d0c980e236243cd03aa2291243587ac1ba3c2b04 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-disk-drain into disk-drain={yes|no} Change the --no-disk-drain drbdsetup command line option as well as the no_disk_drain netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e7a6eeae94e2..5d1bdda8ec9f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1181,7 +1181,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) if (wo == WO_bdev_flush && !dc->disk_flushes) wo = WO_drain_io; - if (wo == WO_drain_io && dc->no_disk_drain) + if (wo == WO_drain_io && !dc->disk_drain) wo = WO_none; rcu_read_unlock(); mdev->write_ordering = wo; diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 945c4dd3470c..30ad6600b444 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -124,7 +124,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) - __flg_field_def(18, GENLA_F_MANDATORY, no_disk_drain, 0) + __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) ) -- cgit v1.2.3 From e544046ab842ab93c275a6fc4e043c1cb637076d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-md-flushes into md-flushes={yes|no} Change the --no-md-flushes drbdsetup command line option as well as the no_md_flush netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 6 +++--- include/linux/drbd_genl.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9af097416e26..4a946a877bde 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1449,10 +1449,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ - if (new_disk_conf->no_md_flush) - set_bit(MD_NO_FUA, &mdev->flags); - else + if (new_disk_conf->md_flushes) clear_bit(MD_NO_FUA, &mdev->flags); + else + set_bit(MD_NO_FUA, &mdev->flags); /* Point of no return reached. * Devices and memory are no longer released by error cleanup below. diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 30ad6600b444..53518fc23154 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -125,7 +125,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) - __flg_field_def(19, GENLA_F_MANDATORY, no_md_flush, 0) + __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, 1) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, -- cgit v1.2.3 From bb77d34ecc6fe6cdc3f4f0841a516695c2eacc04 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 4 May 2011 15:25:35 +0200 Subject: drbd: Turn no-tcp-cork into tcp-cork={yes|no} Change the --no-tcp-cork drbdsetup command line option as well as the no_cork netlink packet. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 2 +- include/linux/drbd_genl.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5d1bdda8ec9f..b4858bb78940 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5040,7 +5040,7 @@ int drbd_asender(struct drbd_thread *thi) int expect = header_size; bool ping_timeout_active = false; struct net_conf *nc; - int ping_timeo, no_cork, ping_int; + int ping_timeo, tcp_cork, ping_int; current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ @@ -5051,7 +5051,7 @@ int drbd_asender(struct drbd_thread *thi) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); ping_timeo = nc->ping_timeo; - no_cork = nc->no_cork; + tcp_cork = nc->tcp_cork; ping_int = nc->ping_int; rcu_read_unlock(); @@ -5066,14 +5066,14 @@ int drbd_asender(struct drbd_thread *thi) /* TODO: conditionally cork; it may hurt latency if we cork without much to send */ - if (!no_cork) + if (tcp_cork) drbd_tcp_cork(tconn->meta.socket); if (tconn_finish_peer_reqs(tconn)) { conn_err(tconn, "tconn_finish_peer_reqs() failed\n"); goto reconnect; } /* but unconditionally uncork unless disabled */ - if (!no_cork) + if (tcp_cork) drbd_tcp_uncork(tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e37c42d5dd6e..78c3de49eff6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1694,7 +1694,7 @@ int drbd_worker(struct drbd_thread *thi) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - cork = nc ? !nc->no_cork : 0; + cork = nc ? nc->tcp_cork : 0; rcu_read_unlock(); if (tconn->data.socket && cork) diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 53518fc23154..6632d10f1ee1 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -162,7 +162,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field_def(28, GENLA_F_MANDATORY, no_cork, 0) + __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, 1) __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) -- cgit v1.2.3 From 7bac3e6f7e74993475a94487effe05dc1f68bdc7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 17:50:57 +0200 Subject: drbd: Also define the default values of boolean flags in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 16 ++++++++-------- include/linux/drbd_limits.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6632d10f1ee1..02647dc8c67c 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -122,10 +122,10 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, 1) - __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, 1) - __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, 1) - __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, 1) + __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -160,12 +160,12 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, 0) + __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) - __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, 1) - __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, 0) + __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field_def(31, GENLA_F_MANDATORY, use_rle, 0) + __flg_field_def(31, GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index bcebb016fda3..3d3e2d5125cb 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -170,4 +170,14 @@ #define DRBD_PROTOCOL_DEF DRBD_PROT_C +#define DRBD_DISK_BARRIER_DEF 0 +#define DRBD_DISK_FLUSHES_DEF 1 +#define DRBD_DISK_DRAIN_DEF 1 +#define DRBD_MD_FLUSHES_DEF 1 +#define DRBD_TCP_CORK_DEF 1 + +#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 +#define DRBD_ALWAYS_ASBP_DEF 0 +#define DRBD_USE_RLE_DEF 0 + #endif -- cgit v1.2.3 From 6139f60dc192e2c5478c1126d1aff7905dc0a98a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 May 2011 20:00:02 +0200 Subject: drbd: Rename the want_lose field/flag to discard_my_data This is what it is called in config files and on the command line as well. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- include/linux/drbd_genl.h | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 56b190c65546..fa36757ffc4a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -413,7 +413,7 @@ struct p_rs_param_95 { } __packed; enum drbd_conn_flags { - CF_WANT_LOSE = 1, + CF_DISCARD_MY_DATA = 1, CF_DRY_RUN = 2, }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 22c2b4c881da..86c8bc5ac603 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -943,8 +943,8 @@ int __drbd_send_protocol(struct drbd_tconn *tconn) p->after_sb_2p = cpu_to_be32(nc->after_sb_2p); p->two_primaries = cpu_to_be32(nc->two_primaries); cf = 0; - if (nc->want_lose) - cf |= CF_WANT_LOSE; + if (nc->discard_my_data) + cf |= CF_DISCARD_MY_DATA; if (nc->dry_run) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); @@ -988,7 +988,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) mdev->comm_bm_set = drbd_bm_total_weight(mdev); p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); rcu_read_lock(); - uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->want_lose ? 1 : 0; + uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0; rcu_read_unlock(); uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 195428ee6052..9a82306adf92 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -606,7 +606,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) mutex_lock(&mdev->tconn->conf_update); nc = mdev->tconn->net_conf; if (nc) - nc->want_lose = 0; /* without copy; single bit op is atomic */ + nc->discard_my_data = 0; /* without copy; single bit op is atomic */ mutex_unlock(&mdev->tconn->conf_update); set_disk_ro(mdev->vdisk, false); @@ -1738,7 +1738,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) return ERR_STONITH_AND_PROT_A; } - if (mdev->state.role == R_PRIMARY && new_conf->want_lose) + if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) return ERR_DISCARD; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index aa42967398e3..e4e8f8a408d1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2908,9 +2908,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (nc->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) + if (nc->discard_my_data && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!nc->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) + if (!nc->discard_my_data && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -3009,7 +3009,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) { struct p_protocol *p = pi->data; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; - int p_want_lose, p_two_primaries, cf; + int p_discard_my_data, p_two_primaries, cf; struct net_conf *nc; p_proto = be32_to_cpu(p->protocol); @@ -3018,7 +3018,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) p_after_sb_2p = be32_to_cpu(p->after_sb_2p); p_two_primaries = be32_to_cpu(p->two_primaries); cf = be32_to_cpu(p->conn_flags); - p_want_lose = cf & CF_WANT_LOSE; + p_discard_my_data = cf & CF_DISCARD_MY_DATA; if (tconn->agreed_pro_version >= 87) { char integrity_alg[SHARED_SECRET_MAX]; @@ -3075,8 +3075,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi) goto disconnect_rcu_unlock; } - if (p_want_lose && nc->want_lose) { - conn_err(tconn, "both sides have the 'want_lose' flag set\n"); + if (p_discard_my_data && nc->discard_my_data) { + conn_err(tconn, "both sides have the 'discard_my_data' flag set\n"); goto disconnect_rcu_unlock; } @@ -3806,7 +3806,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } mutex_lock(&mdev->tconn->conf_update); - mdev->tconn->net_conf->want_lose = 0; /* without copy; single bit op is atomic */ + mdev->tconn->net_conf->discard_my_data = 0; /* without copy; single bit op is atomic */ mutex_unlock(&mdev->tconn->conf_update); drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 02647dc8c67c..6aece551d87e 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -161,7 +161,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, want_lose) + __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, discard_my_data) __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) -- cgit v1.2.3 From 6394b9358e6187414b7a6de7ba2c681ee4a790ac Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:29:52 +0200 Subject: drbd: Refer to resync-rate consistently throughout the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fa36757ffc4a..8026adacd3d2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -389,21 +389,21 @@ struct p_barrier_ack { } __packed; struct p_rs_param { - u32 rate; + u32 resync_rate; /* Since protocol version 88 and higher. */ char verify_alg[0]; } __packed; struct p_rs_param_89 { - u32 rate; + u32 resync_rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; } __packed; struct p_rs_param_95 { - u32 rate; + u32 resync_rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; u32 c_plan_ahead; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 86c8bc5ac603..26d7763d5255 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -888,14 +888,14 @@ int drbd_send_sync_param(struct drbd_conf *mdev) if (get_ldev(mdev)) { dc = rcu_dereference(mdev->ldev->disk_conf); - p->rate = cpu_to_be32(dc->resync_rate); + p->resync_rate = cpu_to_be32(dc->resync_rate); p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); p->c_delay_target = cpu_to_be32(dc->c_delay_target); p->c_fill_target = cpu_to_be32(dc->c_fill_target); p->c_max_rate = cpu_to_be32(dc->c_max_rate); put_ldev(mdev); } else { - p->rate = cpu_to_be32(DRBD_RATE_DEF); + p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF); p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e4e8f8a408d1..684f79542727 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3221,7 +3221,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi) old_disk_conf = mdev->ldev->disk_conf; *new_disk_conf = *old_disk_conf; - new_disk_conf->resync_rate = be32_to_cpu(p->rate); + new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); } if (apv >= 88) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 6aece551d87e..778708d92939 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -113,7 +113,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RATE_DEF) + __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3d3e2d5125cb..48339ae69d50 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -98,10 +98,11 @@ /* syncer { */ /* FIXME allow rate to be zero? */ -#define DRBD_RATE_MIN 1 +#define DRBD_RESYNC_RATE_MIN 1 /* channel bonding 10 GbE, or other hardware */ -#define DRBD_RATE_MAX (4 << 20) -#define DRBD_RATE_DEF 250 /* kb/second */ +#define DRBD_RESYNC_RATE_MAX (4 << 20) +#define DRBD_RESYNC_RATE_DEF 250 +#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ /* less than 7 would hit performance unnecessarily. * 919 slots context information per transaction, -- cgit v1.2.3 From 69ef82dea4c34e4a0541fc3f415b0fef70fe12b0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:34:35 +0200 Subject: drbd: Refer to connect-int consistently throughout the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 12 ++++++------ include/linux/drbd_genl.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 684f79542727..7deade196a33 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -617,7 +617,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) struct sockaddr_in6 peer_in6; struct net_conf *nc; int err, peer_addr_len, my_addr_len; - int sndbuf_size, rcvbuf_size, try_connect_int; + int sndbuf_size, rcvbuf_size, connect_int; int disconnect_on_error = 1; rcu_read_lock(); @@ -629,7 +629,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; - try_connect_int = nc->try_connect_int; + connect_int = nc->connect_int; my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6)); memcpy(&src_in6, nc->my_addr, my_addr_len); @@ -653,7 +653,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = try_connect_int * HZ; + sock->sk->sk_sndtimeo = connect_int * HZ; drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); /* explicitly bind to the configured IP as source IP @@ -702,7 +702,7 @@ out: static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) { int timeo, err, my_addr_len; - int sndbuf_size, rcvbuf_size, try_connect_int; + int sndbuf_size, rcvbuf_size, connect_int; struct socket *s_estab = NULL, *s_listen; struct sockaddr_in6 my_addr; struct net_conf *nc; @@ -717,7 +717,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; - try_connect_int = nc->try_connect_int; + connect_int = nc->connect_int; my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6)); memcpy(&my_addr, nc->my_addr, my_addr_len); @@ -731,7 +731,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) goto out; } - timeo = try_connect_int * HZ; + timeo = connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 778708d92939..67c816c0fc28 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -143,7 +143,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, try_connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) -- cgit v1.2.3 From 95f8efd08bcce65df994049a292b94e56c7ada67 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 May 2011 11:15:34 +0200 Subject: drbd: Fix the upper limit of resync-after The 32-bit resync_after netlink field takes a device minor number as parameter, which is no longer limited to 255. We cannot statically verify which device numbers are valid, so set the ummer limit to the highest possible signed 32-bit integer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_nl.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 26 +++++++++++++------------- include/linux/drbd.h | 4 ++-- include/linux/drbd_genl.h | 2 +- include/linux/drbd_limits.h | 7 ++++--- 6 files changed, 24 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8026adacd3d2..e16722840767 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1408,8 +1408,8 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor); -void drbd_sync_after_changed(struct drbd_conf *mdev); +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); +void drbd_resync_after_changed(struct drbd_conf *mdev); extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9a82306adf92..74c27f1507f3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1183,10 +1183,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } write_lock_irq(&global_state_lock); - retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after); + retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after); if (retcode == NO_ERROR) { rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf); - drbd_sync_after_changed(mdev); + drbd_resync_after_changed(mdev); } write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ec8f4245ef9a..6410c55831e0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -57,7 +57,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel); /* About the global_state_lock Each state transition on an device holds a read lock. In case we have - to evaluate the sync after dependencies, we grab a write lock, because + to evaluate the resync after dependencies, we grab a write lock, because we need stable states on all devices for that. */ rwlock_t global_state_lock; @@ -1340,17 +1340,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) static int _drbd_may_sync_now(struct drbd_conf *mdev) { struct drbd_conf *odev = mdev; - int ra; + int resync_after; while (1) { if (!odev->ldev) return 1; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); - if (ra == -1) + if (resync_after == -1) return 1; - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); if (!expect(odev)) return 1; if ((odev->state.conn >= C_SYNC_SOURCE && @@ -1426,36 +1426,36 @@ void suspend_other_sg(struct drbd_conf *mdev) } /* caller must hold global_state_lock */ -enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor) +enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor) { struct drbd_conf *odev; - int ra; + int resync_after; if (o_minor == -1) return NO_ERROR; if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) - return ERR_SYNC_AFTER; + return ERR_RESYNC_AFTER; /* check for loops */ odev = minor_to_mdev(o_minor); while (1) { if (odev == mdev) - return ERR_SYNC_AFTER_CYCLE; + return ERR_RESYNC_AFTER_CYCLE; rcu_read_lock(); - ra = rcu_dereference(odev->ldev->disk_conf)->resync_after; + resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; rcu_read_unlock(); /* dependency chain ends here, no cycles. */ - if (ra == -1) + if (resync_after == -1) return NO_ERROR; /* follow the dependency chain */ - odev = minor_to_mdev(ra); + odev = minor_to_mdev(resync_after); } } /* caller must hold global_state_lock */ -void drbd_sync_after_changed(struct drbd_conf *mdev) +void drbd_resync_after_changed(struct drbd_conf *mdev) { int changes; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 6c7c85d8fc41..05063e6db81f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -130,8 +130,8 @@ enum drbd_ret_code { ERR_INTR = 129, /* EINTR */ ERR_RESIZE_RESYNC = 130, ERR_NO_PRIMARY = 131, - ERR_SYNC_AFTER = 132, - ERR_SYNC_AFTER_CYCLE = 133, + ERR_RESYNC_AFTER = 132, + ERR_RESYNC_AFTER_CYCLE = 133, ERR_PAUSE_IS_SET = 134, ERR_PAUSE_IS_CLEAR = 135, ERR_PACKET_NR = 137, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 67c816c0fc28..a59466f7f661 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_AFTER_DEF) + __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 48339ae69d50..c4a8f0fef7b2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,9 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_AFTER_MIN -1 -#define DRBD_AFTER_MAX 255 -#define DRBD_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_MIN -1 +#define DRBD_RESYNC_AFTER_MAX (1<<30) +#define DRBD_RESYNC_AFTER_DEF -1 +#define DRBD_RESYNC_AFTER_SCALE '1' /* } */ -- cgit v1.2.3 From 3a45abd577727d2268e190d372600f8652883453 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 12 May 2011 12:02:54 +0200 Subject: drbd: Convert resync-after into a signed netlink field Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 3 +++ include/linux/genl_magic_struct.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index a59466f7f661..7b174a093a8d 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) + __s32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index e908f1c50355..94e839aafae3 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -437,6 +437,9 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __u32_field_def #define __u32_field_def(attr_nr, attr_flag, name, default) \ x->name = default; +#undef __s32_field_def +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; #undef __flg_field_def #define __flg_field_def(attr_nr, attr_flag, name, default) \ x->name = default; diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index b1ddbb5bd725..0fca21fd1af5 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -115,6 +115,8 @@ enum { __flg_field(attr_nr, attr_flag, name) #define __u32_field_def(attr_nr, attr_flag, name, default) \ __u32_field(attr_nr, attr_flag, name) +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + __s32_field(attr_nr, attr_flag, name) #define __str_field_def(attr_nr, attr_flag, name, maxlen) \ __str_field(attr_nr, attr_flag, name, maxlen) -- cgit v1.2.3 From c5482bbd9607bf38cbc952eacaa429e6ba3160a0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 May 2011 14:44:55 +0200 Subject: drbd: Rename DISK_SIZE_SECT -> DISK_SIZE We don't have the units in constant names in other places, either. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index c4a8f0fef7b2..8f8bbea545e0 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,9 +125,10 @@ * the upper limit with 64bit kernel, enough ram and flexible meta data * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ -#define DRBD_DISK_SIZE_SECT_MIN 0 -#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) -#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_MIN 0 +#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_SCALE 's' /* sectors */ #define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE -- cgit v1.2.3 From dcb20d1a8e7d9602e52a9b673ae4d7f746d2cbb2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 16 May 2011 14:30:24 +0200 Subject: drbd: Refuse to change network options online when... * the peer does not speak protocol_version 100 and the user wants to change one of: - wire_protocol - two_primaries - integrity_alg * the user wants to remove the allow_two_primaries flag when there are two primaries Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 22 ++++++++++++++++++---- include/linux/drbd.h | 1 + 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 74c27f1507f3..133a6724657d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1722,10 +1722,24 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n struct drbd_conf *mdev; int i; - if (old_conf && tconn->agreed_pro_version < 100 && - tconn->cstate == C_WF_REPORT_PARAMS && - new_conf->wire_protocol != old_conf->wire_protocol) - return ERR_NEED_APV_100; + if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) { + if (new_conf->wire_protocol != old_conf->wire_protocol) + return ERR_NEED_APV_100; + + if (new_conf->two_primaries != old_conf->two_primaries) + return ERR_NEED_APV_100; + + if (!new_conf->integrity_alg != !old_conf->integrity_alg) + return ERR_NEED_APV_100; + + if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg)) + return ERR_NEED_APV_100; + } + + if (!new_conf->two_primaries && + conn_highest_role(tconn) == R_PRIMARY && + conn_highest_peer(tconn) == R_PRIMARY) + return ERR_NEED_ALLOW_TWO_PRI; if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 05063e6db81f..679e81123229 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -161,6 +161,7 @@ enum drbd_ret_code { ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, ERR_NEED_APV_100 = 163, + ERR_NEED_ALLOW_TWO_PRI = 164, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3 From 309f0b70ab789bf85c5f5f32dbc466d42f024747 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 May 2011 01:24:14 +0200 Subject: drbd: Use more generic constant names These constants are useful for the same purpose in more than one place. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 8f8bbea545e0..3627f760966e 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -113,10 +113,10 @@ #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 -#define DRBD_RESYNC_AFTER_MIN -1 -#define DRBD_RESYNC_AFTER_MAX (1<<30) -#define DRBD_RESYNC_AFTER_DEF -1 -#define DRBD_RESYNC_AFTER_SCALE '1' +#define DRBD_MINOR_NUMBER_MIN -1 +#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_DEF -1 +#define DRBD_MINOR_NUMBER_SCALE '1' /* } */ -- cgit v1.2.3 From 509100e6012db92f4af3796436b450447c6c8268 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 17 May 2011 13:29:46 +0200 Subject: drbd: Output signed / unsigned netlink fields correctly Note: All input values are still treated as signed; unsigned long long values are still broken. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_func.h | 23 +++++++++++------ include/linux/genl_magic_struct.h | 52 ++++++++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 94e839aafae3..2ae16126c6a4 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -78,12 +78,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ { s_fields }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \ + __put, __is_signed) \ [__nla_type(attr_nr)] = { .type = nla_type }, #undef __array #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ - __get, __put) \ + __get, __put, __is_signed) \ [__nla_type(attr_nr)] = { .type = nla_type, \ .len = maxlen - (nla_type == NLA_NUL_STRING) }, @@ -241,7 +242,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ } #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name = __get(nla); \ @@ -249,7 +251,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ /* validate_nla() already checked nla_len <= maxlen appropriately. */ #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ __assign(attr_nr, attr_flag, name, nla_type, type, \ if (s) \ s->name ## _len = \ @@ -410,14 +413,16 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ __put(skb, attr_nr, s->name); \ } #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ __put(skb, attr_nr, min_t(int, maxlen, \ @@ -431,9 +436,11 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ /* Functions for initializing structs to default values. */ #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) #undef __u32_field_def #define __u32_field_def(attr_nr, attr_flag, name, default) \ x->name = default; diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 0fca21fd1af5..ba911da84d9f 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -87,28 +87,28 @@ enum { /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, char, \ - nla_get_u8, NLA_PUT_U8) + nla_get_u8, NLA_PUT_U8, false) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ - nla_get_u8, NLA_PUT_U8) + nla_get_u8, NLA_PUT_U8, false) #define __u16_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ - nla_get_u16, NLA_PUT_U16) + nla_get_u16, NLA_PUT_U16, false) #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ - nla_get_u32, NLA_PUT_U32) + nla_get_u32, NLA_PUT_U32, false) #define __s32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ - nla_get_u32, NLA_PUT_U32) + nla_get_u32, NLA_PUT_U32, true) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ - nla_get_u64, NLA_PUT_U64) + nla_get_u64, NLA_PUT_U64, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, NLA_PUT) + nla_strlcpy, NLA_PUT, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ - nla_memcpy, NLA_PUT) + nla_memcpy, NLA_PUT, false) /* fields with default values */ #define __flg_field_def(attr_nr, attr_flag, name, default) \ @@ -174,11 +174,13 @@ enum { \ }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, \ + __get, __put, __is_signed) \ T_ ## name = (__u16)(attr_nr | attr_flag), #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, \ + maxlen, __get, __put, __is_signed) \ T_ ## name = (__u16)(attr_nr | attr_flag), #include GENL_MAGIC_INCLUDE_FILE @@ -238,11 +240,13 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ } #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ case attr_nr: #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ case attr_nr: #include GENL_MAGIC_INCLUDE_FILE @@ -260,16 +264,36 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ struct s_name { s_fields }; #undef __field -#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ type name; #undef __array -#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ type name[maxlen]; \ __u32 name ## _len; #include GENL_MAGIC_INCLUDE_FILE +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#include GENL_MAGIC_INCLUDE_FILE + /* }}}1 */ #endif /* GENL_MAGIC_STRUCT_H */ /* vim: set foldmethod=marker nofoldenable : */ -- cgit v1.2.3 From bbbef2d5ad8203f67274196dac90754ae106a463 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 18 May 2011 16:48:16 +0200 Subject: drbd: Remove unused GENLA_F_MAY_IGNORE flag Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_struct.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index ba911da84d9f..f3c3425ac30f 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -40,11 +40,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * yet implemented features, if newer userland tries to use them even though * the genl_family version clearly indicates they are not available. * - * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. - * To be used for API extensions for things that have sane defaults, - * so newer userland can still talk to older kernel, knowing it will - * silently ignore these attributes if not yet known. - * * NOTE: These flags overload * NLA_F_NESTED (1 << 15) * NLA_F_NET_BYTEORDER (1 << 14) @@ -55,7 +50,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * See also: nla_type() */ enum { - GENLA_F_MAY_IGNORE = 0, GENLA_F_MANDATORY = 1 << 14, GENLA_F_REQUIRED = 1 << 15, -- cgit v1.2.3 From 5f9359201b5cf1d94fe0e0c47fcba38cfc921863 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 19 May 2011 17:39:28 +0200 Subject: drbd: Make drbd's use of netlink attribute flags less confusing Make it more clear in the flag names which flags are internal to drbd, and which are not. The check for mandatory attributes is the only extension visible at the netlink layer. Attributes with this flag set would look like unknown attributes to some kernel versions. The netlink layer would ignore them and also skip consistency checks on the attribute type and legth. To avoid this, we check for mandatory attributes first, remove the mandatory flag, and then process the attributes normally. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 244 +++++++++++++++++++------------------- include/linux/genl_magic_func.h | 107 ++++++++--------- include/linux/genl_magic_struct.h | 64 ++++------ 3 files changed, 193 insertions(+), 222 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 7b174a093a8d..4ceecb9307d9 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -86,7 +86,7 @@ */ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, /* "arbitrary" size strings, nla_policy.len = 0 */ - __str_field(1, GENLA_F_MANDATORY, info_text, 0) + __str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0) ) /* Configuration requests typically need a context to operate on. @@ -95,133 +95,133 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and/or the replication group (aka resource) name, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, - __u32_field(1, GENLA_F_MANDATORY, ctx_volume) - __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) + __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) + __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_conn_name, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, - __str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, backing_dev, 128) - __str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev, 128) - __s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT, meta_dev_idx) + __str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128) + __str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128) + __s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx) /* use the resize command to try and change the disk_size */ - __u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT, disk_size) + __u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size) /* we could change the max_bio_bvecs, * but it won't propagate through the stack */ - __u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT, max_bio_bvecs) - - __u32_field_def(6, GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) - __u32_field_def(7, GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) - - __u32_field_def(8, GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) - __s32_field_def(9, GENLA_F_MANDATORY, resync_after, DRBD_RESYNC_AFTER_DEF) - __u32_field_def(10, GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) - __u32_field_def(11, GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) - __u32_field_def(12, GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) - __u32_field_def(13, GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) - __u32_field_def(14, GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) - __u32_field_def(15, GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) - - __flg_field_def(16, GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) - __flg_field_def(17, GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) - __flg_field_def(18, GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) - __flg_field_def(19, GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs) + + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) + + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) + __s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + + __flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, - __str_field_def(1, GENLA_F_MANDATORY, cpu_mask, 32) - __u32_field_def(2, GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) + __str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __bin_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT, my_addr, 128) - __bin_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT, peer_addr, 128) - __str_field_def(3, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + __bin_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, my_addr, 128) + __bin_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_addr, 128) + __str_field_def(3, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field_def(4, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field_def(5, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field_def(6, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field_def(7, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field_def(8, GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) - __u32_field_def(10, GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) - __u32_field_def(11, GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) - __u32_field_def(12, GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) - __u32_field_def(13, GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) - __u32_field_def(14, GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) - __u32_field_def(15, GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) - __u32_field_def(16, GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) - __u32_field_def(17, GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) - __u32_field_def(18, GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) - __u32_field_def(19, GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) - __u32_field_def(20, GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) - __u32_field_def(21, GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) - __u32_field_def(22, GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) - __u32_field_def(23, GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) - __u32_field_def(24, GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) - __u32_field_def(25, GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT, discard_my_data) - __flg_field_def(28, GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) - __flg_field_def(29, GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT, dry_run) - __flg_field_def(31, GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + __str_field_def(4, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(5, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(6, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(7, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(9, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(16, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(17, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(18, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(21, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(22, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(23, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(24, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(25, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(26, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(28, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(29, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field_def(31, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, - __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) + __flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate) ) GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, - __u64_field(1, GENLA_F_MANDATORY, resize_size) - __flg_field(2, GENLA_F_MANDATORY, resize_force) - __flg_field(3, GENLA_F_MANDATORY, no_resync) + __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) + __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) + __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) ) GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, /* the reason of the broadcast, * if this is an event triggered broadcast. */ - __u32_field(1, GENLA_F_MANDATORY, sib_reason) - __u32_field(2, GENLA_F_REQUIRED, current_state) - __u64_field(3, GENLA_F_MANDATORY, capacity) - __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + __u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason) + __u32_field(2, DRBD_F_REQUIRED, current_state) + __u64_field(3, DRBD_GENLA_F_MANDATORY, capacity) + __u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid) /* These are for broadcast from after state change work. * prev_state and new_state are from the moment the state change took * place, new_state is not neccessarily the same as current_state, * there may have been more state changes since. Which will be * broadcasted soon, in their respective after state change work. */ - __u32_field(5, GENLA_F_MANDATORY, prev_state) - __u32_field(6, GENLA_F_MANDATORY, new_state) + __u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state) + __u32_field(6, DRBD_GENLA_F_MANDATORY, new_state) /* if we have a local disk: */ - __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) - __u32_field(8, GENLA_F_MANDATORY, disk_flags) - __u64_field(9, GENLA_F_MANDATORY, bits_total) - __u64_field(10, GENLA_F_MANDATORY, bits_oos) + __bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags) + __u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total) + __u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos) /* and in case resync or online verify is active */ - __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) - __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + __u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed) /* for pre and post notifications of helper execution */ - __str_field(13, GENLA_F_MANDATORY, helper, 32) - __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) + __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) + __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) ) GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, - __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) + __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) ) GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, - __flg_field(1, GENLA_F_MANDATORY, clear_bm) + __flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm) ) GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, - __u32_field(1, GENLA_F_REQUIRED, timeout_type) + __u32_field(1, DRBD_F_REQUIRED, timeout_type) ) GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, - __flg_field(1, GENLA_F_MANDATORY, force_disconnect) + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) /* @@ -232,11 +232,11 @@ GENL_mc_group(events) /* kernel -> userspace announcement of changes */ GENL_notification( DRBD_EVENT, 1, events, - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) - GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY) ) /* query kernel for specific or all info */ @@ -250,116 +250,116 @@ GENL_op( ), /* To select the object .doit. * Or a subset of objects in .dumpit. */ - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) ) #if 0 /* TO BE DONE */ /* create or destroy resources, aka replication groups */ GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) #endif /* add DRBD minor devices as volumes to resources */ GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) /* add or delete replication links to resources */ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, GENL_doit(drbd_adm_resource_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_CONNECT, 10, GENL_doit(drbd_adm_connect), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_CHG_NET_OPTS, 29, GENL_doit(drbd_adm_net_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) ) GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_ATTACH, 12, GENL_doit(drbd_adm_attach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED) ) GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, GENL_doit(drbd_adm_disk_opts), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_RESIZE, 13, GENL_doit(drbd_adm_resize), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_PRIMARY, 14, GENL_doit(drbd_adm_set_role), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_SECONDARY, 15, GENL_doit(drbd_adm_set_role), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) ) GENL_op( DRBD_ADM_NEW_C_UUID, 16, GENL_doit(drbd_adm_new_c_uuid), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) - GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op( DRBD_ADM_START_OV, 17, GENL_doit(drbd_adm_start_ov), - GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY) ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 2ae16126c6a4..58edd403a3ff 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -3,53 +3,6 @@ #include -/* - * Extension of genl attribute validation policies {{{1 - * {{{2 - */ - -/** - * nla_is_required - return true if this attribute is required - * @nla: netlink attribute - */ -static inline int nla_is_required(const struct nlattr *nla) -{ - return nla->nla_type & GENLA_F_REQUIRED; -} - -/** - * nla_is_mandatory - return true if understanding this attribute is mandatory - * @nla: netlink attribute - * Note: REQUIRED attributes are implicitly MANDATORY as well - */ -static inline int nla_is_mandatory(const struct nlattr *nla) -{ - return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); -} - -/* Functionality to be integrated into nla_parse(), and validate_nla(), - * respectively. - * - * Enforcing the "mandatory" bit is done here, - * by rejecting unknown mandatory attributes. - * - * Part of enforcing the "required" flag would mean to embed it into - * nla_policy.type, and extending validate_nla(), which currently does - * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, - * so we cannot do that. Thats why enforcing "required" is done in the - * generated assignment functions below. */ -static int nla_check_unknown(int maxtype, struct nlattr *head, int len) -{ - struct nlattr *nla; - int rem; - nla_for_each_attr(nla, head, len, rem) { - __u16 type = nla_type(nla); - if (type > maxtype && nla_is_mandatory(nla)) - return -EOPNOTSUPP; - } - return 0; -} - /* * Magic: declare tla policy {{{1 * Magic: declare nested policies @@ -80,13 +33,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \ __put, __is_signed) \ - [__nla_type(attr_nr)] = { .type = nla_type }, + [attr_nr] = { .type = nla_type }, #undef __array #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ __get, __put, __is_signed) \ - [__nla_type(attr_nr)] = { .type = nla_type, \ - .len = maxlen - (nla_type == NLA_NUL_STRING) }, + [attr_nr] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, #include GENL_MAGIC_INCLUDE_FILE @@ -189,6 +142,43 @@ static struct nlattr *nested_attr_tb[128]; #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) #endif +static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) +{ + struct nlattr *head = nla_data(nla); + int len = nla_len(nla); + int rem; + + /* + * validate_nla (called from nla_parse_nested) ignores attributes + * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. + * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY + * flag set also, check and remove that flag before calling + * nla_parse_nested. + */ + + nla_for_each_attr(nla, head, len, rem) { + if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + if (nla_type(nla) > maxtype) + return -EOPNOTSUPP; + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; + } + } + return 0; +} + +static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + const struct nla_policy *policy) +{ + int err; + + err = drbd_nla_check_mandatory(maxtype, nla); + if (!err) + err = nla_parse_nested(tb, maxtype, nla, policy); + + return err; +} + #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ /* *_from_attrs functions are static, but potentially unused */ \ @@ -204,12 +194,9 @@ static int __ ## s_name ## _from_attrs(struct s_name *s, \ if (!tla) \ return -ENOMSG; \ DPRINT_TLA(#s_name, "<=-", #tag_name); \ - err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ if (err) \ return err; \ - err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ - if (err) \ - return err; \ \ s_fields \ return 0; \ @@ -226,17 +213,17 @@ static int s_name ## _from_attrs_for_change(struct s_name *s, \ } __attribute__((unused)) \ #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ - nla = ntb[__nla_type(attr_nr)]; \ + nla = ntb[attr_nr]; \ if (nla) { \ - if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ pr_info("<< must not change invariant attr: %s\n", #name); \ return -EEXIST; \ } \ assignment; \ - } else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) { \ + } else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ /* attribute missing from payload, */ \ /* which was expected */ \ - } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + } else if ((attr_flag) & DRBD_F_REQUIRED) { \ pr_info("<< missing attr: %s\n", #name); \ return -ENOMSG; \ } @@ -415,7 +402,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ __is_signed) \ - if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ __put(skb, attr_nr, s->name); \ } @@ -423,7 +410,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ __get, __put, __is_signed) \ - if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ __put(skb, attr_nr, min_t(int, maxlen, \ s->name ## _len + (nla_type == NLA_NUL_STRING)),\ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index f3c3425ac30f..1d0bd79e27b3 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -26,50 +26,34 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); * Extension of genl attribute validation policies {{{2 */ -/** - * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement - * - * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. - * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. +/* + * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not + * know about. This flag can be set in nlattr->nla_type to indicate that this + * attribute must not be ignored. * - * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. - * Without this, unknown attributes (> maxtype) are _silently_ ignored - * by validate_nla(). + * We check and remove this flag in drbd_nla_check_mandatory() before + * validating the attribute types and lengths via nla_parse_nested(). + */ +#define DRBD_GENLA_F_MANDATORY (1 << 14) + +/* + * Flags specific to drbd and not visible at the netlink layer, used in + * _from_attrs and _to_skb: * - * To be used for API extensions, so older kernel can reject requests for not - * yet implemented features, if newer userland tries to use them even though - * the genl_family version clearly indicates they are not available. + * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is + * invalid. * - * NOTE: These flags overload - * NLA_F_NESTED (1 << 15) - * NLA_F_NET_BYTEORDER (1 << 14) - * from linux/netlink.h, which are not useful for validate_nla(): - * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting - * .type = NLA_NESTED in the appropriate policy. + * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be + * included in unpriviledged get requests or broadcasts. * - * See also: nla_type() + * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but + * cannot subsequently be changed. */ -enum { - GENLA_F_MANDATORY = 1 << 14, - GENLA_F_REQUIRED = 1 << 15, - - /* Below will not be present in the __u16 .nla_type, but can be - * triggered on in _to_skb resp. _from_attrs */ - - /* To exclude "sensitive" information from broadcasts, or on - * unpriviledged get requests. This is useful because genetlink - * multicast groups can be listened in on by anyone. */ - GENLA_F_SENSITIVE = 1 << 16, - - /* INVARIAN options cannot be changed at runtime. - * Useful to share an attribute policy and struct definition, - * between some "create" and "change" commands, - * but disallow certain fields to be changed online. - */ - GENLA_F_INVARIANT = 1 << 17, -}; +#define DRBD_F_REQUIRED (1 << 0) +#define DRBD_F_SENSITIVE (1 << 1) +#define DRBD_F_INVARIANT (1 << 2) -#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) +#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY)) /* }}}1 * MAGIC @@ -170,12 +154,12 @@ enum { \ #undef __field #define __field(attr_nr, attr_flag, name, nla_type, type, \ __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | attr_flag), + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), #undef __array #define __array(attr_nr, attr_flag, name, nla_type, type, \ maxlen, __get, __put, __is_signed) \ - T_ ## name = (__u16)(attr_nr | attr_flag), + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), #include GENL_MAGIC_INCLUDE_FILE -- cgit v1.2.3 From 5084d71d89e1a94193378efb12ac659e4e6ada3f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 24 May 2011 14:08:58 +0200 Subject: drbd: drbd_nla_check_mandatory(): Need to remove the DRBD_GENLA_F_MANDATORY flag first We need to remove the flag before checking for valid types. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/genl_magic_func.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 58edd403a3ff..357f2ad403b1 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -158,9 +158,9 @@ static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) nla_for_each_attr(nla, head, len, rem) { if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; if (nla_type(nla) > maxtype) return -EOPNOTSUPP; - nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; } } return 0; -- cgit v1.2.3 From 67b58bf723b083d4776cd7c9959246ef46c0d36f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 6 Jun 2011 15:36:04 +0200 Subject: drbd: spelling fix: too small It is not "to small", but "too small". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 ++++---- include/linux/drbd.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 59923db780b9..31d27dd92924 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1360,7 +1360,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) new_disk_conf->disk_size); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1374,7 +1374,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { - retcode = ERR_MD_DISK_TO_SMALL; + retcode = ERR_MD_DISK_TOO_SMALL; dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); @@ -1385,7 +1385,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * (we may currently be R_PRIMARY with no local disk...) */ if (drbd_get_max_capacity(nbc) < drbd_get_capacity(mdev->this_bdev)) { - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1447,7 +1447,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto force_diskless_dec; } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 679e81123229..fedda00374af 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -112,8 +112,8 @@ enum drbd_ret_code { ERR_OPEN_MD_DISK = 105, ERR_DISK_NOT_BDEV = 107, ERR_MD_NOT_BDEV = 108, - ERR_DISK_TO_SMALL = 111, - ERR_MD_DISK_TO_SMALL = 112, + ERR_DISK_TOO_SMALL = 111, + ERR_MD_DISK_TOO_SMALL = 112, ERR_BDCLAIM_DISK = 114, ERR_BDCLAIM_MD_DISK = 115, ERR_MD_IDX_INVALID = 116, -- cgit v1.2.3 From 789c1b626cb490acb36cf481b45040b324f60fde Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 6 Jun 2011 16:16:44 +0200 Subject: drbd: Use the terminology suggested by the command names in the source code and messages Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 16 ++++++++-------- include/linux/drbd.h | 4 ++-- include/linux/drbd_genl.h | 17 ++++------------- 3 files changed, 14 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 31d27dd92924..5b4090f52f5a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -47,8 +47,8 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info); int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info); -int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info); +int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info); int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); @@ -2972,7 +2972,7 @@ drbd_check_conn_name(const char *name) return NO_ERROR; } -int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -2989,7 +2989,7 @@ int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info) if (adm_ctx.tconn) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { retcode = ERR_INVALID_REQUEST; - drbd_msg_put_info("connection exists"); + drbd_msg_put_info("resource exists"); } /* else: still NO_ERROR */ goto out; @@ -3086,7 +3086,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) goto out; if (!adm_ctx.tconn) { - retcode = ERR_CONN_NOT_KNOWN; + retcode = ERR_RES_NOT_KNOWN; goto out; } @@ -3140,7 +3140,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) retcode = NO_ERROR; } else { /* "can not happen" */ - retcode = ERR_CONN_IN_USE; + retcode = ERR_RES_IN_USE; drbd_msg_put_info("failed to delete connection"); } goto out; @@ -3149,7 +3149,7 @@ out: return 0; } -int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) +int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; @@ -3166,7 +3166,7 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) retcode = NO_ERROR; } else { - retcode = ERR_CONN_IN_USE; + retcode = ERR_RES_IN_USE; } if (retcode == NO_ERROR) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index fedda00374af..161cd414b036 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,8 +155,8 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, - ERR_CONN_NOT_KNOWN = 158, - ERR_CONN_IN_USE = 159, + ERR_RES_NOT_KNOWN = 158, + ERR_RES_IN_USE = 159, ERR_MINOR_CONFIGURED = 160, ERR_MINOR_EXISTS = 161, ERR_INVALID_REQUEST = 162, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 4ceecb9307d9..47ef324b69db 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -253,25 +253,16 @@ GENL_op( GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) ) -#if 0 - /* TO BE DONE */ - /* create or destroy resources, aka replication groups */ -GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -#endif - /* add DRBD minor devices as volumes to resources */ -GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), +GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) - /* add or delete replication links to resources */ -GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + /* add or delete resources */ +GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) -GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), +GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, -- cgit v1.2.3 From 7c3063cc6f0e75cdf312f5f318f9a4c02e460397 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 9 Jun 2011 17:52:12 +0200 Subject: drbd: Also need to check for DRBD_GENLA_F_MANDATORY flags before nla_find_nested() This is done by introducing drbd_nla_find_nested() which handles the flag before calling nla_find_nested(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +++ drivers/block/drbd/drbd_nl.c | 96 ++++++++++++++++++++++++++++++++--------- include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 37 ---------------- 4 files changed, 83 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c3019730a24f..c58430183d5f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1407,6 +1407,12 @@ extern bool conn_try_outdate_peer(struct drbd_tconn *tconn); extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); +struct nla_policy; +extern int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla); +extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy); +extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype); + /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5b4090f52f5a..24187f1c93d5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -92,7 +92,7 @@ static struct drbd_config_context { #define VOLUME_UNSPECIFIED (-1U) /* pointer into the request skb, * limited lifetime! */ - char *conn_name; + char *resource_name; /* reply buffer */ struct sk_buff *reply_skb; @@ -191,15 +191,15 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, /* and assign stuff to the global adm_ctx */ nla = nested_attr_tb[__nla_type(T_ctx_volume)]; adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; - nla = nested_attr_tb[__nla_type(T_ctx_conn_name)]; + nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; if (nla) - adm_ctx.conn_name = nla_data(nla); + adm_ctx.resource_name = nla_data(nla); } else adm_ctx.volume = VOLUME_UNSPECIFIED; adm_ctx.minor = d_in->minor; adm_ctx.mdev = minor_to_mdev(d_in->minor); - adm_ctx.tconn = conn_get_by_name(adm_ctx.conn_name); + adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name); if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) { drbd_msg_put_info("unknown minor"); @@ -214,7 +214,8 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, if (adm_ctx.mdev && adm_ctx.tconn && adm_ctx.mdev->tconn != adm_ctx.tconn) { pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n", - adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name); + adm_ctx.minor, adm_ctx.resource_name, + adm_ctx.mdev->tconn->name); drbd_msg_put_info("minor exists in different connection"); return ERR_INVALID_REQUEST; } @@ -239,7 +240,7 @@ fail: static int drbd_adm_finish(struct genl_info *info, int retcode) { struct nlattr *nla; - const char *conn_name = NULL; + const char *resource_name = NULL; if (adm_ctx.tconn) { kref_put(&adm_ctx.tconn->kref, &conn_destroy); @@ -253,9 +254,10 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) nla = info->attrs[DRBD_NLA_CFG_CONTEXT]; if (nla) { - nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); - if (nla) - conn_name = nla_data(nla); + int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; + nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); + if (nla && !IS_ERR(nla)) + resource_name = nla_data(nla); } drbd_adm_send_reply(adm_ctx.reply_skb, info); @@ -2526,7 +2528,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr) +int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr) { struct nlattr *nla; nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); @@ -2534,7 +2536,7 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigne goto nla_put_failure; if (vnr != VOLUME_UNSPECIFIED) NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name); + NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name); nla_nest_end(skb, nla); return 0; @@ -2778,8 +2780,9 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) { const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; struct nlattr *nla; - const char *conn_name; + const char *resource_name; struct drbd_tconn *tconn; + int maxtype; /* Is this a followup call? */ if (cb->args[0]) { @@ -2799,12 +2802,15 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) /* No explicit context given. Dump all. */ if (!nla) goto dump; - nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name)); + maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; + nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); + if (IS_ERR(nla)) + return PTR_ERR(nla); /* context given, but no name present? */ if (!nla) return -EINVAL; - conn_name = nla_data(nla); - tconn = conn_get_by_name(conn_name); + resource_name = nla_data(nla); + tconn = conn_get_by_name(resource_name); if (!tconn) return -ENODEV; @@ -2957,16 +2963,16 @@ out_nolock: } static enum drbd_ret_code -drbd_check_conn_name(const char *name) +drbd_check_resource_name(const char *name) { if (!name || !name[0]) { - drbd_msg_put_info("connection name missing"); + drbd_msg_put_info("resource name missing"); return ERR_MANDATORY_TAG; } /* if we want to use these in sysfs/configfs/debugfs some day, * we must not allow slashes */ if (strchr(name, '/')) { - drbd_msg_put_info("invalid connection name"); + drbd_msg_put_info("invalid resource name"); return ERR_INVALID_REQUEST; } return NO_ERROR; @@ -2982,7 +2988,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = drbd_check_conn_name(adm_ctx.conn_name); + retcode = drbd_check_resource_name(adm_ctx.resource_name); if (retcode != NO_ERROR) goto out; @@ -2995,7 +3001,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!conn_create(adm_ctx.conn_name)) + if (!conn_create(adm_ctx.resource_name)) retcode = ERR_NOMEM; out: drbd_adm_finish(info, retcode); @@ -3213,3 +3219,53 @@ failed: "Event seq:%u sib_reason:%u\n", err, seq, sib->sib_reason); } + +int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) +{ + struct nlattr *head = nla_data(nla); + int len = nla_len(nla); + int rem; + + /* + * validate_nla (called from nla_parse_nested) ignores attributes + * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. + * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY + * flag set also, check and remove that flag before calling + * nla_parse_nested. + */ + + nla_for_each_attr(nla, head, len, rem) { + if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { + nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; + if (nla_type(nla) > maxtype) + return -EOPNOTSUPP; + } + } + return 0; +} + +int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy) +{ + int err; + + err = drbd_nla_check_mandatory(maxtype, nla); + if (!err) + err = nla_parse_nested(tb, maxtype, nla, policy); + + return err; +} + +struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype) +{ + int err; + /* + * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and + * we don't know about that attribute, reject all the nested + * attributes. + */ + err = drbd_nla_check_mandatory(maxtype, nla); + if (err) + return ERR_PTR(err); + return nla_find_nested(nla, attrtype); +} diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 47ef324b69db..0c2102c05384 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -96,7 +96,7 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, * and the volume id within the resource. */ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) - __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_conn_name, 128) + __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 357f2ad403b1..0b8a88e2e83e 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -142,43 +142,6 @@ static struct nlattr *nested_attr_tb[128]; #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) #endif -static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) -{ - struct nlattr *head = nla_data(nla); - int len = nla_len(nla); - int rem; - - /* - * validate_nla (called from nla_parse_nested) ignores attributes - * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. - * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY - * flag set also, check and remove that flag before calling - * nla_parse_nested. - */ - - nla_for_each_attr(nla, head, len, rem) { - if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { - nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; - if (nla_type(nla) > maxtype) - return -EOPNOTSUPP; - } - } - return 0; -} - -static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, - struct nlattr *nla, - const struct nla_policy *policy) -{ - int err; - - err = drbd_nla_check_mandatory(maxtype, nla); - if (!err) - err = nla_parse_nested(tb, maxtype, nla, policy); - - return err; -} - #undef GENL_struct #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ /* *_from_attrs functions are static, but potentially unused */ \ -- cgit v1.2.3 From 089c075d88ac9407b8d7c5c8fc4b21c0d940bd82 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 14 Jun 2011 18:28:09 +0200 Subject: drbd: Convert the generic netlink interface to accept connection endpoints Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 ++ drivers/block/drbd/drbd_main.c | 21 +++++ drivers/block/drbd/drbd_nl.c | 158 ++++++++++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 21 +++-- drivers/block/drbd/drbd_state.c | 2 + include/linux/drbd_genl.h | 62 +++++++-------- 6 files changed, 164 insertions(+), 107 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c3019730a24f..6d6d1056d824 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -836,6 +836,11 @@ struct drbd_tconn { /* is a resource from the config file */ wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ struct res_opts res_opts; + struct sockaddr_storage my_addr; + int my_addr_len; + struct sockaddr_storage peer_addr; + int peer_addr_len; + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ int agreed_pro_version; /* actually used protocol version */ @@ -1377,6 +1382,8 @@ extern void drbd_minor_destroy(struct kref *kref); struct drbd_tconn *conn_create(const char *name); extern void conn_destroy(struct kref *kref); struct drbd_tconn *conn_get_by_name(const char *name); +extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, + void *peer_addr, int peer_addr_len); extern void conn_free_crypto(struct drbd_tconn *tconn); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 178c711bc4af..79f275dc43a4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2420,6 +2420,27 @@ found: return tconn; } +struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, + void *peer_addr, int peer_addr_len) +{ + struct drbd_tconn *tconn; + + rcu_read_lock(); + list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { + if (tconn->my_addr_len == my_addr_len && + tconn->peer_addr_len == peer_addr_len && + !memcmp(&tconn->my_addr, my_addr, my_addr_len) && + !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) { + kref_get(&tconn->kref); + goto found; + } + } + tconn = NULL; +found: + rcu_read_unlock(); + return tconn; +} + static int drbd_alloc_socket(struct drbd_socket *socket) { socket->rbuf = (void *) __get_free_page(GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 352be132b4be..e7933e04e7b8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -94,6 +94,8 @@ static struct drbd_config_context { /* pointer into the request skb, * limited lifetime! */ char *resource_name; + struct nlattr *my_addr; + struct nlattr *peer_addr; /* reply buffer */ struct sk_buff *reply_skb; @@ -142,6 +144,7 @@ int drbd_msg_put_info(const char *info) */ #define DRBD_ADM_NEED_MINOR 1 #define DRBD_ADM_NEED_RESOURCE 2 +#define DRBD_ADM_NEED_CONNECTION 4 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, unsigned flags) { @@ -174,6 +177,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, adm_ctx.reply_dh->minor = d_in->minor; adm_ctx.reply_dh->ret_code = NO_ERROR; + adm_ctx.volume = VOLUME_UNSPECIFIED; if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { struct nlattr *nla; /* parse and validate only */ @@ -191,12 +195,21 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, /* and assign stuff to the global adm_ctx */ nla = nested_attr_tb[__nla_type(T_ctx_volume)]; - adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED; + if (nla) + adm_ctx.volume = nla_get_u32(nla); nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; if (nla) adm_ctx.resource_name = nla_data(nla); - } else - adm_ctx.volume = VOLUME_UNSPECIFIED; + adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; + adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; + if ((adm_ctx.my_addr && + nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) || + (adm_ctx.peer_addr && + nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) { + err = -EINVAL; + goto fail; + } + } adm_ctx.minor = d_in->minor; adm_ctx.mdev = minor_to_mdev(d_in->minor); @@ -211,6 +224,26 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, return ERR_INVALID_REQUEST; } + if (flags & DRBD_ADM_NEED_CONNECTION) { + if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) { + drbd_msg_put_info("no resource name expected"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.mdev) { + drbd_msg_put_info("no minor number expected"); + return ERR_INVALID_REQUEST; + } + if (adm_ctx.my_addr && adm_ctx.peer_addr) + adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr), + nla_len(adm_ctx.my_addr), + nla_data(adm_ctx.peer_addr), + nla_len(adm_ctx.peer_addr)); + if (!adm_ctx.tconn) { + drbd_msg_put_info("unknown connection"); + return ERR_INVALID_REQUEST; + } + } + /* some more paranoia, if the request was over-determined */ if (adm_ctx.mdev && adm_ctx.tconn && adm_ctx.mdev->tconn != adm_ctx.tconn) { @@ -268,30 +301,28 @@ static int drbd_adm_finish(struct genl_info *info, int retcode) static void setup_khelper_env(struct drbd_tconn *tconn, char **envp) { char *afs; - struct net_conf *nc; - rcu_read_lock(); - nc = rcu_dereference(tconn->net_conf); - if (nc) { - switch (((struct sockaddr *)nc->peer_addr)->sa_family) { - case AF_INET6: - afs = "ipv6"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)nc->peer_addr)->sin6_addr); - break; - case AF_INET: - afs = "ipv4"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)nc->peer_addr)->sin_addr); - break; - default: - afs = "ssocks"; - snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)nc->peer_addr)->sin_addr); - } - snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); + /* FIXME: A future version will not allow this case. */ + if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0) + return; + + switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) { + case AF_INET6: + afs = "ipv6"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", + &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr); + break; + case AF_INET: + afs = "ipv4"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); + break; + default: + afs = "ssocks"; + snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr); } - rcu_read_unlock(); + snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); } int drbd_khelper(struct drbd_conf *mdev, char *cmd) @@ -1874,7 +1905,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) int rsr; /* re-sync running */ struct crypto crypto = { }; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -1986,18 +2017,39 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) struct drbd_conf *mdev; struct net_conf *old_conf, *new_conf = NULL; struct crypto crypto = { }; - struct drbd_tconn *oconn; struct drbd_tconn *tconn; - struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; enum drbd_ret_code retcode; int i; int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) goto out; + if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) { + drbd_msg_put_info("connection endpoint(s) missing"); + retcode = ERR_INVALID_REQUEST; + goto out; + } + + /* No need for _rcu here. All reconfiguration is + * strictly serialized on genl_lock(). We are protected against + * concurrent reconfiguration/addition/deletion */ + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len && + !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) { + retcode = ERR_LOCAL_ADDR; + goto out; + } + + if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len && + !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) { + retcode = ERR_PEER_ADDR; + goto out; + } + } tconn = adm_ctx.tconn; conn_reconfig_start(tconn); @@ -2027,37 +2079,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; - retcode = NO_ERROR; - - new_my_addr = (struct sockaddr *)&new_conf->my_addr; - new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; - - /* No need for _rcu here. All reconfiguration is - * strictly serialized on genl_lock(). We are protected against - * concurrent reconfiguration/addition/deletion */ - list_for_each_entry(oconn, &drbd_tconns, all_tconn) { - struct net_conf *nc; - if (oconn == tconn) - continue; - - rcu_read_lock(); - nc = rcu_dereference(oconn->net_conf); - if (nc) { - taken_addr = (struct sockaddr *)&nc->my_addr; - if (new_conf->my_addr_len == nc->my_addr_len && - !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) - retcode = ERR_LOCAL_ADDR; - - taken_addr = (struct sockaddr *)&nc->peer_addr; - if (new_conf->peer_addr_len == nc->peer_addr_len && - !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) - retcode = ERR_PEER_ADDR; - } - rcu_read_unlock(); - if (retcode != NO_ERROR) - goto fail; - } - retcode = alloc_crypto(&crypto, new_conf); if (retcode != NO_ERROR) goto fail; @@ -2083,6 +2104,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) tconn->csums_tfm = crypto.csums_tfm; tconn->verify_tfm = crypto.verify_tfm; + tconn->my_addr_len = nla_len(adm_ctx.my_addr); + memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len); + tconn->peer_addr_len = nla_len(adm_ctx.peer_addr); + memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len); + mutex_unlock(&tconn->conf_update); rcu_read_lock(); @@ -2170,7 +2196,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) enum drbd_ret_code retcode; int err; - retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); + retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); if (!adm_ctx.reply_skb) return retcode; if (retcode != NO_ERROR) @@ -2529,7 +2555,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); } -int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr) +int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr) { struct nlattr *nla; nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); @@ -2537,7 +2563,11 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, uns goto nla_put_failure; if (vnr != VOLUME_UNSPECIFIED) NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name); + NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name); + if (tconn->my_addr_len) + NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr); + if (tconn->peer_addr_len) + NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr); nla_nest_end(skb, nla); return 0; @@ -2574,7 +2604,7 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, /* We need to add connection name and volume number information still. * Minor number is in drbd_genlmsghdr. */ - if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr)) + if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr)) goto nla_put_failure; if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive)) @@ -2736,7 +2766,7 @@ next_tconn: /* this is a tconn without a single volume */ dh->minor = -1U; dh->ret_code = NO_ERROR; - if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED)) + if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED)) genlmsg_cancel(skb, dh); else genlmsg_end(skb, dh); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4ba097293278..ab1d36cb6214 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -626,23 +626,21 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn) rcu_read_unlock(); return NULL; } - sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; connect_int = nc->connect_int; + rcu_read_unlock(); - my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6)); - memcpy(&src_in6, nc->my_addr, my_addr_len); + my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6)); + memcpy(&src_in6, &tconn->my_addr, my_addr_len); - if (((struct sockaddr *)nc->my_addr)->sa_family == AF_INET6) + if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ - peer_addr_len = min_t(int, nc->peer_addr_len, sizeof(src_in6)); - memcpy(&peer_in6, nc->peer_addr, peer_addr_len); - - rcu_read_unlock(); + peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6)); + memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len); what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, @@ -714,15 +712,14 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) rcu_read_unlock(); return NULL; } - sndbuf_size = nc->sndbuf_size; rcvbuf_size = nc->rcvbuf_size; connect_int = nc->connect_int; - - my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6)); - memcpy(&my_addr, nc->my_addr, my_addr_len); rcu_read_unlock(); + my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6)); + memcpy(&my_addr, &tconn->my_addr, my_addr_len); + what = "sock_create_kern"; err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index cd55f46d5c55..d978e4d98a15 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1418,6 +1418,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) mutex_lock(&tconn->conf_update); old_conf = tconn->net_conf; + tconn->my_addr_len = 0; + tconn->peer_addr_len = 0; rcu_assign_pointer(tconn->net_conf, NULL); conn_free_crypto(tconn); mutex_unlock(&tconn->conf_update); diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 0c2102c05384..b93db6c83882 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -97,6 +97,8 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) + __bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128) + __bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128) ) GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, @@ -134,38 +136,36 @@ GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, ) GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, - __bin_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, my_addr, 128) - __bin_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_addr, 128) - __str_field_def(3, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, + __str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, shared_secret, SHARED_SECRET_MAX) - __str_field_def(4, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) - __str_field_def(5, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) - __str_field_def(6, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) - __str_field_def(7, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) - __u32_field_def(8, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) - __u32_field_def(9, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) - __u32_field_def(10, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) - __u32_field_def(11, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) - __u32_field_def(12, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) - __u32_field_def(13, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) - __u32_field_def(14, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) - __u32_field_def(15, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) - __u32_field_def(16, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) - __u32_field_def(17, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) - __u32_field_def(18, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) - __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) - __u32_field_def(20, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) - __u32_field_def(21, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) - __u32_field_def(22, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) - __u32_field_def(23, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) - __u32_field_def(24, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) - __u32_field_def(25, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) - __flg_field_def(26, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) - __flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) - __flg_field_def(28, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) - __flg_field_def(29, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) - __flg_field_def(31, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + __str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, -- cgit v1.2.3 From 6dff2902208364d058746ee794da4d960f6eec6f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 28 Jun 2011 14:18:12 +0200 Subject: drbd: Rename --dry-run to --tentative drbdadm already has a --dry-run option, so this option cannot directly be passed through to drbdsetup. Rename the drbdsetup option to resolve this conflict. For backward compatibility, make --dry-run an alias of --tentative. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 6 +++--- include/linux/drbd_genl.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 933d4767c110..72b1dfa4b656 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -926,7 +926,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - if (nc->dry_run && tconn->agreed_pro_version < 92) { + if (nc->tentative && tconn->agreed_pro_version < 92) { rcu_read_unlock(); mutex_unlock(&sock->mutex); conn_err(tconn, "--dry-run is not supported by peer"); @@ -945,7 +945,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) cf = 0; if (nc->discard_my_data) cf |= CF_DISCARD_MY_DATA; - if (nc->dry_run) + if (nc->tentative) cf |= CF_DRY_RUN; p->conn_flags = cpu_to_be32(cf); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ab1d36cb6214..d55a3cb21c31 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2836,7 +2836,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; struct net_conf *nc; - int hg, rule_nr, rr_conflict, dry_run; + int hg, rule_nr, rr_conflict, tentative; mydisk = mdev->state.disk; if (mydisk == D_NEGOTIATING) @@ -2916,7 +2916,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol (hg < 0) ? "peer" : "this"); } rr_conflict = nc->rr_conflict; - dry_run = nc->dry_run; + tentative = nc->tentative; rcu_read_unlock(); if (hg == -100) { @@ -2949,7 +2949,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { + if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index b93db6c83882..e879a9324380 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -164,7 +164,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) - __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, dry_run) + __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) ) -- cgit v1.2.3 From f03c254961cce65ee2b21c4beccb6975b6f9d308 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 20 Jun 2011 22:21:19 +0200 Subject: drbd: allow ping-timeout of up to 30 seconds Allow up to 300 centi-seconds to be configured for the "ping timeout". There may be setups where heavy congestion, huge buffers, and asymmetric bandwidth limitations may need a "huge" ping-timeout as work-around for "spurious connection loss" problems. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 3627f760966e..82db83410f08 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -62,7 +62,7 @@ /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 -#define DRBD_PING_TIMEO_MAX 100 +#define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 /* max number of write requests between write barriers */ -- cgit v1.2.3 From d942ae44537669418a7cbfd916531d30513dbca8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 31 May 2011 13:07:24 +0200 Subject: drbd: Fixes from the 8.3 development branch * commit 'ae57a0a': drbd: Only print sanitize state's warnings, if the state change happens drbd: we should write meta data updates with FLUSH FUA drbd: fix limit define, we support 1 PiByte now drbd: fix log message argument order drbd: Typo in user-visible message. drbd: Make "(rcv|snd)buf-size" and "ping-timeout" available for the proxy, too. drbd: Allow keywords to be used in multiple config sections. drbd: fix typos in comments. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++++---- drivers/block/drbd/drbd_state.c | 56 ++++++++++++++++++++++++++++++----------- include/linux/drbd_limits.h | 2 +- 3 files changed, 47 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index de42c7cf7caf..1d71b3a3586a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -372,11 +372,11 @@ struct p_connection_features { u32 protocol_max; /* should be more than enough for future enhancements - * for now, feature_flags and the reserverd array shall be zero. + * for now, feature_flags and the reserved array shall be zero. */ u32 _pad; - u64 reserverd[7]; + u64 reserved[7]; } __packed; struct p_barrier { @@ -914,7 +914,7 @@ struct drbd_conf { atomic_t ap_bio_cnt; /* Requests we need to complete */ atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ - atomic_t unacked_cnt; /* Need to send replys for */ + atomic_t unacked_cnt; /* Need to send replies for */ atomic_t local_cnt; /* Waiting for local completion */ /* Interval tree of pending local requests */ @@ -2153,7 +2153,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) /* disk state is stable as well. */ break; - /* no new io accepted during tansitional states */ + /* no new io accepted during transitional states */ case D_ATTACHING: case D_FAILED: case D_NEGOTIATING: @@ -2217,7 +2217,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev) /* we wait here * as long as the device is suspended * until the bitmap is no longer on the fly during connection - * handshake as long as we would exeed the max_buffer limit. + * handshake as long as we would exceed the max_buffer limit. * * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8c9d0348736d..2cf69b25f1e7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,6 +37,15 @@ struct after_state_chg_work { struct completion *done; }; +enum sanitize_state_warnings { + NO_WARNING, + ABORTED_ONLINE_VERIFY, + ABORTED_RESYNC, + CONNECTION_LOST_NEGOTIATING, + IMPLICITLY_UPGRADED_DISK, + IMPLICITLY_UPGRADED_PDSK, +}; + static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); @@ -44,7 +53,7 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort); + enum sanitize_state_warnings *warn); static inline bool is_susp(union drbd_state s) { @@ -656,6 +665,21 @@ is_valid_transition(union drbd_state os, union drbd_state ns) return rv; } +static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) +{ + static const char *msg_table[] = { + [NO_WARNING] = "", + [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", + [ABORTED_RESYNC] = "Resync aborted.", + [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", + [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", + [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", + }; + + if (warn != NO_WARNING) + dev_warn(DEV, "%s\n", msg_table[warn]); +} + /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition * @mdev: DRBD device. @@ -667,11 +691,14 @@ is_valid_transition(union drbd_state os, union drbd_state ns) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, - const char **warn_sync_abort) + enum sanitize_state_warnings *warn) { enum drbd_fencing_p fp; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; + if (warn) + *warn = NO_WARNING; + fp = FP_DONT_CARE; if (get_ldev(mdev)) { rcu_read_lock(); @@ -695,10 +722,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* An implication of the disk states onto the connection state */ /* Abort resync if a disk fails/detaches */ if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn_sync_abort) - *warn_sync_abort = - ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? - "Online-verify" : "Resync"; + if (warn) + *warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? + ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; ns.conn = C_CONNECTED; } @@ -709,7 +735,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { - dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + if (warn) + *warn = CONNECTION_LOST_NEGOTIATING; ns.disk = D_DISKLESS; ns.pdsk = D_UNKNOWN; } @@ -791,16 +818,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = disk_max; if (ns.disk < disk_min) { - dev_warn(DEV, "Implicitly set disk from %s to %s\n", - drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_DISK; ns.disk = disk_min; } if (ns.pdsk > pdsk_max) ns.pdsk = pdsk_max; if (ns.pdsk < pdsk_min) { - dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", - drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_PDSK; ns.pdsk = pdsk_min; } @@ -875,12 +902,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, { union drbd_state os; enum drbd_state_rv rv = SS_SUCCESS; - const char *warn_sync_abort = NULL; + enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; os = drbd_read_state(mdev); - ns = sanitize_state(mdev, ns, &warn_sync_abort); + ns = sanitize_state(mdev, ns, &ssw); if (ns.i == os.i) return SS_NOTHING_TO_DO; @@ -909,8 +936,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } - if (warn_sync_abort) - dev_warn(DEV, "%s aborted.\n", warn_sync_abort); + print_sanitize_warnings(mdev, ssw); drbd_pr_state_change(mdev, os, ns, flags); diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 82db83410f08..f1046b13d9f6 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -126,7 +126,7 @@ * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ #define DRBD_DISK_SIZE_MIN 0 -#define DRBD_DISK_SIZE_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ #define DRBD_DISK_SIZE_SCALE 's' /* sectors */ -- cgit v1.2.3 From cdfda633d235028e9b27381dedb65416409e8729 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 Jul 2011 15:38:59 +0200 Subject: drbd: detach from frozen backing device * drbd-8.3: documentation: Documented detach's --force and disk's --disk-timeout drbd: Implemented the disk-timeout option drbd: Force flag for the detach operation drbd: Allow new IOs while the local disk in in FAILED state drbd: Bitmap IO functions can not return prematurely if the disk breaks drbd: Added a kref to bm_aio_ctx drbd: Hold a reference to ldev while doing meta-data IO drbd: Keep a reference to the bio until the completion handler finished drbd: Implemented wait_until_done_or_disk_failure() drbd: Replaced md_io_mutex by an atomic: md_io_in_use drbd: moved md_io into mdev drbd: Immediately allow completion of IOs, that wait for IO completions on a failed disk drbd: Keep a reference to barrier acked requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 75 ++++++++++++++++++------ drivers/block/drbd/drbd_bitmap.c | 115 +++++++++++++++++++++++++++---------- drivers/block/drbd/drbd_int.h | 12 ++-- drivers/block/drbd/drbd_main.c | 77 ++++++++++++++++++++++--- drivers/block/drbd/drbd_nl.c | 28 ++++++++- drivers/block/drbd/drbd_receiver.c | 2 - drivers/block/drbd/drbd_req.c | 52 +++++++++++------ drivers/block/drbd/drbd_req.h | 19 +++--- drivers/block/drbd/drbd_state.c | 7 +++ drivers/block/drbd/drbd_worker.c | 9 ++- include/linux/drbd_genl.h | 9 ++- include/linux/drbd_limits.h | 6 ++ 12 files changed, 321 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aeb483daea06..58b5b61628fc 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -114,18 +114,44 @@ struct drbd_atodb_wait { static int w_al_write_transaction(struct drbd_work *, int); +void *drbd_md_get_buffer(struct drbd_conf *mdev) +{ + int r; + + wait_event(mdev->misc_wait, + (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || + mdev->state.disk <= D_FAILED); + + return r ? NULL : page_address(mdev->md_io_page); +} + +void drbd_md_put_buffer(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->md_io_in_use)) + wake_up(&mdev->misc_wait); +} + +static bool md_io_allowed(struct drbd_conf *mdev) +{ + enum drbd_disk_state ds = mdev->state.disk; + return ds >= D_NEGOTIATING || ds == D_ATTACHING; +} + +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) +{ + wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); +} + static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) { struct bio *bio; - struct drbd_md_io md_io; int err; - md_io.mdev = mdev; - init_completion(&md_io.event); - md_io.error = 0; + mdev->md_io.done = 0; + mdev->md_io.error = -ENODEV; if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) rw |= REQ_FUA | REQ_FLUSH; @@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, err = -EIO; if (bio_add_page(bio, page, size, 0) != size) goto out; - bio->bi_private = &md_io; + bio->bi_private = &mdev->md_io; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); + err = -ENODEV; + goto out; + } + + bio_get(bio); /* one bio_put() is in the completion handler */ + atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_for_completion(&md_io.event); + wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) - err = md_io.error; + err = mdev->md_io.error; out: bio_put(bio); @@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int err; struct page *iop = mdev->md_io_page; - D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); BUG_ON(!bdev->md_bdev); @@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } - mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ + if (!buffer) { + dev_err(DEV, "disk failed while waiting for md_io buffer\n"); + aw->err = -EIO; + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } memset(buffer, 0, sizeof(*buffer)); buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); @@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) mdev->al_tr_number++; } - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); @@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* lock out all other meta data io for now, * and make sure the page is mapped. */ - mutex_lock(&mdev->md_io_mutex); - b = page_address(mdev->md_io_page); + b = drbd_md_get_buffer(mdev); + if (!b) + return 0; /* Always use the full ringbuffer space for now. * possible optimization: read in all of it, @@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* IO error */ if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!found_valid) { if (found_initialized != mx) dev_warn(DEV, "No usable activity log found.\n"); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 1; } @@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!expect(rv != 0)) goto cancel; if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -643,7 +684,7 @@ cancel: mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", transactions, active_extents); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 52c48143b22a..706e5220dd4a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) struct bm_aio_ctx { struct drbd_conf *mdev; atomic_t in_flight; - struct completion done; + unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 #define BM_AIO_WRITE_HINTED 2 int error; + struct kref kref; }; +static void bm_aio_ctx_destroy(struct kref *kref) +{ + struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + + put_ldev(ctx->mdev); + kfree(ctx); +} + /* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { @@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error) bio_put(bio); - if (atomic_dec_and_test(&ctx->in_flight)) - complete(&ctx->done); + if (atomic_dec_and_test(&ctx->in_flight)) { + ctx->done = 1; + wake_up(&mdev->misc_wait); + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + } } static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - struct bio *bio = bio_alloc_drbd(GFP_KERNEL); + struct bio *bio = bio_alloc_drbd(GFP_NOIO); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; @@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must */ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { - struct bm_aio_ctx ctx = { - .mdev = mdev, - .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = flags, - }; + struct bm_aio_ctx *ctx; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; unsigned long now; @@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * For lazy writeout, we don't care for ongoing changes to the bitmap, * as we submit copies of pages anyways. */ - if (!ctx.flags) + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { + .mdev = mdev, + .in_flight = ATOMIC_INIT(1), + .done = 0, + .flags = flags, + .error = 0, + .kref = { ATOMIC_INIT(2) }, + }; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); + err = -ENODEV; + goto out; + } + + if (!ctx->flags) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); num_pages = b->bm_number_of_pages; @@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w continue; } } - atomic_inc(&ctx.in_flight); - bm_page_io_async(&ctx, i, rw); + atomic_inc(&ctx->in_flight); + bm_page_io_async(ctx, i, rw); ++count; cond_resched(); } /* - * We initialize ctx.in_flight to one to make sure bm_async_io_complete - * will not complete() early, and decrement / test it here. If there + * We initialize ctx->in_flight to one to make sure bm_async_io_complete + * will not set ctx->done early, and decrement / test it here. If there * are still some bios in flight, we need to wait for them here. + * If all IO is done already (or nothing had been submitted), there is + * no need to wait. Still, we need to put the kref associated with the + * "in_flight reached zero, all done" event. */ - if (!atomic_dec_and_test(&ctx.in_flight)) - wait_for_completion(&ctx.done); + if (!atomic_dec_and_test(&ctx->in_flight)) + wait_until_done_or_disk_failure(mdev, &ctx->done); + else + kref_put(&ctx->kref, &bm_aio_ctx_destroy); /* summary for global bitmap IO */ if (flags == 0) dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); - if (ctx.error) { + if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, true); - err = -EIO; /* ctx.error ? */ + err = -EIO; /* ctx->error ? */ } + if (atomic_read(&ctx->in_flight)) + err = -EIO; /* Disk failed during IO... */ + now = jiffies; if (rw == WRITE) { drbd_md_flush(mdev); @@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); +out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - struct bm_aio_ctx ctx = { + struct bm_aio_ctx *ctx; + int err; + + if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { + dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); + return 0; + } + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { .mdev = mdev, .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), + .done = 0, .flags = BM_AIO_COPY_PAGES, + .error = 0, + .kref = { ATOMIC_INIT(2) }, }; - if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { - dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); - return 0; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); + err = -ENODEV; + goto out; } - bm_page_io_async(&ctx, idx, WRITE_SYNC); - wait_for_completion(&ctx.done); + bm_page_io_async(ctx, idx, WRITE_SYNC); + wait_until_done_or_disk_failure(mdev, &ctx->done); - if (ctx.error) + if (ctx->error) drbd_chk_io_error(mdev, 1, true); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; - return ctx.error; + err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; + out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + return err; } /* NOTE diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6035784f0de3..4e582058a7c9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -780,8 +780,7 @@ struct drbd_backing_dev { }; struct drbd_md_io { - struct drbd_conf *mdev; - struct completion event; + unsigned int done; int error; }; @@ -852,6 +851,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct list_head barrier_acked_requests; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ @@ -978,7 +978,8 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct mutex md_io_mutex; /* protects the md_io_buffer */ + struct drbd_md_io md_io; + atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ spinlock_t al_lock; wait_queue_head_t al_wait; struct lru_cache *act_log; /* activity log */ @@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); extern int drbd_resync_finished(struct drbd_conf *mdev); /* maybe rather drbd_main.c ? */ +extern void *drbd_md_get_buffer(struct drbd_conf *mdev); +extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) @@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) case D_OUTDATED: case D_CONSISTENT: case D_UP_TO_DATE: + case D_FAILED: /* disk state is stable as well. */ break; /* no new io accepted during transitional states */ case D_ATTACHING: - case D_FAILED: case D_NEGOTIATING: case D_UNKNOWN: case D_MASK: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 448de7bf8223..15384986e4a4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn) tconn->oldest_tle = b; tconn->newest_tle = b; INIT_LIST_HEAD(&tconn->out_of_sequence_requests); + INIT_LIST_HEAD(&tconn->barrier_acked_requests); return 1; } @@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, These have been list_move'd to the out_of_sequence_requests list in _req_mod(, BARRIER_ACKED) above. */ - list_del_init(&b->requests); + list_splice_init(&b->requests, &tconn->barrier_acked_requests); mdev = b->w.mdev; nob = b->next; @@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) b = tmp; list_splice(&carry_reads, &b->requests); } -} + /* Actions operating on the disk state, also want to work on + requests that got barrier acked. */ + switch (what) { + case FAIL_FROZEN_DISK_IO: + case RESTART_FROZEN_DISK_IO: + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); + } + case CONNECTION_LOST_WHILE_PENDING: + case RESEND: + break; + default: + conn_err(tconn, "what = %d in _tl_restart()\n", what); + } +} /** * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL @@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) spin_unlock_irq(&tconn->req_lock); } +/** + * tl_apply() - Applies an event to all requests for a certain mdev in the TL + * @mdev: DRBD device. + * @what: The action/event to perform with all request objects + * + * @what might ony be ABORT_DISK_IO. + */ +void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_tl_epoch *b; + struct list_head *le, *tle; + struct drbd_request *req; + + D_ASSERT(what == ABORT_DISK_IO); + + spin_lock_irq(&tconn->req_lock); + b = tconn->oldest_tle; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + b = b->next; + } + + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + + spin_unlock_irq(&tconn->req_lock); +} + static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; @@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); atomic_set(&mdev->ap_in_flight, 0); + atomic_set(&mdev->md_io_in_use, 0); - mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->own_state_mutex); mdev->state_mutex = &mdev->own_state_mutex; @@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref) struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); struct drbd_tconn *tconn = mdev->tconn; + del_timer_sync(&mdev->request_timer); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + memset(buffer, 0, 512); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev) * since we updated it on metadata. */ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); +out: put_ldev(mdev); } @@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is @@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; err: - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); + out: put_ldev(mdev); return rv; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 97d1dab045d2..bf8d0b077624 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); + mod_timer(&mdev->request_timer, jiffies + HZ); goto success; fail_unlock: @@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (rv < SS_SUCCESS) goto force_diskless_dec; + mod_timer(&mdev->request_timer, jiffies + HZ); + if (mdev->state.role == R_PRIMARY) mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; else @@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } -static int adm_detach(struct drbd_conf *mdev) +static int adm_detach(struct drbd_conf *mdev, int force) { enum drbd_state_rv retcode; int ret; + + if (force) { + drbd_force_state(mdev, NS(disk, D_FAILED)); + retcode = SS_SUCCESS; + goto out; + } + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); /* D_FAILED will transition to DISKLESS. */ @@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; +out: return retcode; } @@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev) int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; + struct detach_parms parms = { }; + int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = adm_detach(adm_ctx.mdev); + if (info->attrs[DRBD_NLA_DETACH_PARMS]) { + err = detach_parms_from_attrs(&parms, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + retcode = adm_detach(adm_ctx.mdev, parms.force_detach); out: drbd_adm_finish(info, retcode); return 0; @@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_detach(mdev); + retcode = adm_detach(mdev, 0); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to detach"); goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7218750d2937..3a7e54b8f418 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); wake_up(&mdev->misc_wait); - del_timer(&mdev->request_timer); - del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c4e4553f5c2c..8fa51cda3b7e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; - /* only WRITES may end up here without a master bio (on barrier ack) */ - int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ - if (s & RQ_LOCAL_PENDING) + if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) return; if (req->i.waiting) { /* Retry all conflicting peer requests. */ @@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) req->master_bio = NULL; } + if (s & RQ_LOCAL_PENDING) + return; + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, * or protocol C P_WRITE_ACK, @@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case COMPLETED_OK: - if (bio_data_dir(req->master_bio) == WRITE) + if (req->rq_state & RQ_WRITE) mdev->writ_cnt += req->i.size >> 9; else mdev->read_cnt += req->i.size >> 9; @@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; + case ABORT_DISK_IO: + req->rq_state |= RQ_LOCAL_ABORTED; + if (req->rq_state & RQ_WRITE) + _req_may_be_done_not_susp(req, m); + else + goto goto_queue_for_net_read; + break; + case WRITE_COMPLETED_WITH_ERROR: req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); put_ldev(mdev); + goto_queue_for_net_read: + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { @@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data) struct drbd_request *req; /* oldest request */ struct list_head *le; struct net_conf *nc; - unsigned long et; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */ rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + + if (get_ldev(mdev)) { + dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; + put_ldev(mdev); + } rcu_read_unlock(); - if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) + et = min_not_zero(dt, ent); + + if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) return; /* Recurring timer stopped */ spin_lock_irq(&tconn->req_lock); @@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data) le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (time_is_before_eq_jiffies(req->start_time + et)) { - if (req->rq_state & RQ_NET_PENDING) { + if (ent && req->rq_state & RQ_NET_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + ent)) { dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); - } else { - dev_warn(DEV, "Local backing block device frozen?\n"); - mod_timer(&mdev->request_timer, jiffies + et); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + } + } + if (dt && req->rq_state & RQ_LOCAL_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); } - } else { - mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&tconn->req_lock); + mod_timer(&mdev->request_timer, req->start_time + et); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 5135c95fbf85..f6aff150addb 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -106,6 +106,7 @@ enum drbd_req_event { READ_COMPLETED_WITH_ERROR, READ_AHEAD_COMPLETED_WITH_ERROR, WRITE_COMPLETED_WITH_ERROR, + ABORT_DISK_IO, COMPLETED_OK, RESEND, FAIL_FROZEN_DISK_IO, @@ -119,18 +120,21 @@ enum drbd_req_event { * same time, so we should hold the request lock anyways. */ enum drbd_req_state_bits { - /* 210 - * 000: no local possible - * 001: to be submitted + /* 3210 + * 0000: no local possible + * 0001: to be submitted * UNUSED, we could map: 011: submitted, completion still pending - * 110: completed ok - * 010: completed with error + * 0110: completed ok + * 0010: completed with error + * 1001: Aborted (before completion) + * 1x10: Aborted and completed -> free */ __RQ_LOCAL_PENDING, __RQ_LOCAL_COMPLETED, __RQ_LOCAL_OK, + __RQ_LOCAL_ABORTED, - /* 76543 + /* 87654 * 00000: no network possible * 00001: to be send * 00011: to be send, on worker queue @@ -209,8 +213,9 @@ enum drbd_req_state_bits { #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) +#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) -#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ +#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4c13a6f4f184..f51cefdbeff3 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -29,6 +29,9 @@ #include "drbd_int.h" #include "drbd_req.h" +/* in drbd_main.c */ +extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what); + struct after_state_chg_work { struct drbd_work w; union drbd_state os; @@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, rcu_read_unlock(); was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_apply(mdev, ABORT_DISK_IO); + /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, * and that may only happen after our put_ldev below. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6410c55831e0..dac8d9bc4bec 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -67,11 +67,18 @@ rwlock_t global_state_lock; void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; + struct drbd_conf *mdev; md_io = (struct drbd_md_io *)bio->bi_private; + mdev = container_of(md_io, struct drbd_conf, md_io); + md_io->error = error; - complete(&md_io->event); + md_io->done = 1; + wake_up(&mdev->misc_wait); + bio_put(bio); + drbd_md_put_buffer(mdev); + put_ldev(mdev); } /* reads on behalf of the partner, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index e879a9324380..2e6cefefe5e5 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) +GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) +) + /* * Notifications and commands (genlmsghdr->cmd) */ @@ -335,7 +340,9 @@ GENL_op( ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) + GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index f1046b13d9f6..ddd332db2a5d 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -50,6 +50,12 @@ #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + /* If backing disk takes longer than disk_timeout, mark the disk as failed */ +#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ +#define DRBD_DISK_TIMEOUT_SCALE '1' + /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 -- cgit v1.2.3 From d5d7ebd42250620a6da2a8f6943c024391433488 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 5 Jul 2011 20:59:26 +0200 Subject: drbd: on attach, enforce clean meta data Detection of unclean shutdown has moved into user space. The kernel code will, whenever it updates the meta data, mark it as "unclean", and will refuse to attach to such unclean meta data. "drbdadm up" now schedules "drbdmeta apply-al", which will apply the activity log to the bitmap, and/or reinitialize it, if necessary, as well as set a "clean" indicator flag. This moves a bit code out of kernel space. As a side effect, it also prevents some 8.3 module from accidentally ignoring the 8.4 style activity log, if someone should downgrade, whether on purpose, or accidentally because he changed kernel versions without providing an 8.4 for the new kernel, and the new kernel comes with in-tree 8.3. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 259 --------------------------------------- drivers/block/drbd/drbd_int.h | 6 - drivers/block/drbd/drbd_main.c | 26 ++-- drivers/block/drbd/drbd_nl.c | 19 +-- drivers/block/drbd/drbd_state.c | 1 + include/linux/drbd.h | 10 +- 6 files changed, 28 insertions(+), 293 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 58b5b61628fc..da8ffd54fc18 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -462,265 +462,6 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } -/* FIXME - * reading of the activity log, - * and potentially dirtying of the affected bitmap regions, - * should be done from userland only. - * DRBD would simply always attach with an empty activity log, - * and refuse to attach to something that looks like a crashed primary. - */ - -/** - * drbd_al_read_tr() - Read a single transaction from the on disk activity log - * @mdev: DRBD device. - * @bdev: Block device to read form. - * @b: pointer to an al_transaction. - * @index: On disk slot of the transaction to read. - * - * Returns -1 on IO error, 0 on checksum error and 1 upon success. - */ -static int drbd_al_read_tr(struct drbd_conf *mdev, - struct drbd_backing_dev *bdev, - int index) -{ - struct al_transaction_on_disk *b = page_address(mdev->md_io_page); - sector_t sector; - u32 crc; - - sector = bdev->md.md_offset - + bdev->md.al_offset - + index * (MD_BLOCK_SIZE>>9); - - /* Dont process error normally, - * as this is done before disk is attached! */ - if (drbd_md_sync_page_io(mdev, bdev, sector, READ)) - return -1; - - if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) - return 0; - - if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) - return 0; - - if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) - return 0; - - if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) - return 0; - - crc = be32_to_cpu(b->crc32c); - b->crc32c = 0; - if (!expect(crc == crc32c(0, b, 4096))) - return 0; - - return 1; -} - -/** - * drbd_al_read_log() - Restores the activity log from its on disk representation. - * @mdev: DRBD device. - * @bdev: Block device to read form. - * - * Returns 1 on success, returns 0 when reading the log failed due to IO errors. - */ -int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) -{ - struct al_transaction_on_disk *b; - int i; - int rv; - int mx; - int active_extents = 0; - int transactions = 0; - int found_valid = 0; - int found_initialized = 0; - int from = 0; - int to = 0; - u32 from_tnr = 0; - u32 to_tnr = 0; - u32 cnr; - - /* Note that this is expected to be called with a newly created, - * clean and all unused activity log of the "expected size". - */ - - /* lock out all other meta data io for now, - * and make sure the page is mapped. - */ - b = drbd_md_get_buffer(mdev); - if (!b) - return 0; - - /* Always use the full ringbuffer space for now. - * possible optimization: read in all of it, - * then scan the in-memory pages. */ - - mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); - - /* Find the valid transaction in the log */ - for (i = 0; i < mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, i); - /* invalid data in that block */ - if (rv == 0) - continue; - if (be16_to_cpu(b->transaction_type) == AL_TR_INITIALIZED) { - ++found_initialized; - continue; - } - - /* IO error */ - if (rv == -1) { - drbd_md_put_buffer(mdev); - return 0; - } - - cnr = be32_to_cpu(b->tr_number); - if (++found_valid == 1) { - from = i; - to = i; - from_tnr = cnr; - to_tnr = cnr; - continue; - } - - D_ASSERT(cnr != to_tnr); - D_ASSERT(cnr != from_tnr); - if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx); - from = i; - from_tnr = cnr; - } - if ((int)cnr - (int)to_tnr > 0) { - D_ASSERT(cnr - to_tnr == i - to); - to = i; - to_tnr = cnr; - } - } - - if (!found_valid) { - if (found_initialized != mx) - dev_warn(DEV, "No usable activity log found.\n"); - drbd_md_put_buffer(mdev); - return 1; - } - - /* Read the valid transactions. - * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ - i = from; - while (1) { - struct lc_element *e; - unsigned j, n, slot, extent_nr; - - rv = drbd_al_read_tr(mdev, bdev, i); - if (!expect(rv != 0)) - goto cancel; - if (rv == -1) { - drbd_md_put_buffer(mdev); - return 0; - } - - /* deal with different transaction types. - * not yet implemented */ - if (!expect(b->transaction_type == 0)) - goto cancel; - - /* on the fly re-create/resize activity log? - * will be a special transaction type flag. */ - if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) - goto cancel; - if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) - goto cancel; - - /* We are the only user of the activity log right now, - * don't actually need to take that lock. */ - spin_lock_irq(&mdev->al_lock); - - /* first, apply the context, ... */ - for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); - j < AL_CONTEXT_PER_TRANSACTION && - slot < mdev->act_log->nr_elements; j++, slot++) { - extent_nr = be32_to_cpu(b->context[j]); - e = lc_element_by_index(mdev->act_log, slot); - if (e->lc_number != extent_nr) { - if (extent_nr != LC_FREE) - active_extents++; - else - active_extents--; - } - lc_set(mdev->act_log, extent_nr, slot); - } - - /* ... then apply the updates, - * which override the context information. - * drbd_al_read_tr already did the rangecheck - * on n <= AL_UPDATES_PER_TRANSACTION */ - n = be16_to_cpu(b->n_updates); - for (j = 0; j < n; j++) { - slot = be16_to_cpu(b->update_slot_nr[j]); - extent_nr = be32_to_cpu(b->update_extent_nr[j]); - if (!expect(slot < mdev->act_log->nr_elements)) - break; - e = lc_element_by_index(mdev->act_log, slot); - if (e->lc_number != extent_nr) { - if (extent_nr != LC_FREE) - active_extents++; - else - active_extents--; - } - lc_set(mdev->act_log, extent_nr, slot); - } - spin_unlock_irq(&mdev->al_lock); - - transactions++; - -cancel: - if (i == to) - break; - i++; - if (i >= mx) - i = 0; - } - - mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); - - /* ok, we are done with it */ - drbd_md_put_buffer(mdev); - - dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", - transactions, active_extents); - - return 1; -} - -/** - * drbd_al_apply_to_bm() - Sets the bitmap to dirty(1) where covered by active AL extents - * @mdev: DRBD device. - */ -void drbd_al_apply_to_bm(struct drbd_conf *mdev) -{ - unsigned int enr; - unsigned long add = 0; - char ppb[10]; - int i, tmp; - - wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - - for (i = 0; i < mdev->act_log->nr_elements; i++) { - enr = lc_element_by_index(mdev->act_log, i)->lc_number; - if (enr == LC_FREE) - continue; - tmp = drbd_bm_ALe_set_all(mdev, enr); - dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr); - add += tmp; - } - - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - - dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", - ppsize(ppb, Bit2KB(add))); -} - static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) { int rv; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4e582058a7c9..9d0d6d0fb820 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -164,10 +164,6 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { /* usual integer division */ #define div_floor(A, B) ((A)/(B)) -/* drbd_meta-data.c (still in drbd_main.c) */ -/* 4th incarnation of the disk layout. */ -#define DRBD_MD_MAGIC (DRBD_MAGIC+4) - extern struct ratelimit_state drbd_ratelimit_state; extern struct idr minors; /* RCU, updates: genl_lock() */ extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */ @@ -1560,7 +1556,6 @@ extern void drbd_rs_cancel_all(struct drbd_conf *mdev); extern int drbd_rs_del_all(struct drbd_conf *mdev); extern void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size); -extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go); extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); @@ -1570,7 +1565,6 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); #define drbd_set_out_of_sync(mdev, sector, size) \ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) -extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); extern void drbd_al_shrink(struct drbd_conf *mdev); /* drbd_nl.c */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 15384986e4a4..f1d696ab6e83 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2932,7 +2932,7 @@ void drbd_md_sync(struct drbd_conf *mdev) for (i = UI_CURRENT; i < UI_SIZE; i++) buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); buffer->flags = cpu_to_be32(mdev->ldev->md.flags); - buffer->magic = cpu_to_be32(DRBD_MD_MAGIC); + buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN); buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect); buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset); @@ -2967,11 +2967,12 @@ out: * @bdev: Device from which the meta data should be read in. * * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case - * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. + * something goes wrong. */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { struct meta_data_on_disk *buffer; + u32 magic, flags; int i, rv = NO_ERROR; if (!get_ldev_if_state(mdev, D_ATTACHING)) @@ -2989,8 +2990,20 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } - if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) { - dev_err(DEV, "Error while reading metadata, magic not found.\n"); + magic = be32_to_cpu(buffer->magic); + flags = be32_to_cpu(buffer->flags); + if (magic == DRBD_MD_MAGIC_84_UNCLEAN || + (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) { + /* btw: that's Activity Log clean, not "all" clean. */ + dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n"); + rv = ERR_MD_UNCLEAN; + goto err; + } + if (magic != DRBD_MD_MAGIC_08) { + if (magic == DRBD_MD_MAGIC_07) + dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); + else + dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); rv = ERR_MD_INVALID; goto err; } @@ -3035,11 +3048,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } spin_unlock_irq(&mdev->tconn->req_lock); - /* This blocks wants to be get removed... */ - bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents); - if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) - bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; - err: drbd_md_put_buffer(mdev); out: diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index bf8d0b077624..b39f5dc0f47b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1267,7 +1267,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) union drbd_state ns, os; enum drbd_state_rv rv; struct net_conf *nc; - int cp_discovered = 0; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1477,11 +1476,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto force_diskless_dec; } - if (!drbd_al_read_log(mdev, nbc)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - /* Reset the "barriers don't work" bits here, then force meta data to * be written, to ensure we determine if barriers are supported. */ if (new_disk_conf->md_flushes) @@ -1511,10 +1505,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) clear_bit(CRASHED_PRIMARY, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) { + !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) set_bit(CRASHED_PRIMARY, &mdev->flags); - cp_discovered = 1; - } mdev->send_cnt = 0; mdev->recv_cnt = 0; @@ -1566,15 +1558,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (cp_discovered) { - drbd_al_apply_to_bm(mdev); - if (drbd_bitmap_io(mdev, &drbd_bm_write, - "crashed primary apply AL", BM_LOCKED_MASK)) { - retcode = ERR_IO_MD_DISK; - goto force_diskless_dec; - } - } - if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) drbd_suspend_al(mdev); /* IO is still suspended here... */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index f51cefdbeff3..c4d0d96d7906 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1017,6 +1017,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); + mdf &= ~MDF_AL_CLEAN; if (test_bit(CRASHED_PRIMARY, &mdev->flags)) mdf |= MDF_CRASHED_PRIMARY; if (mdev->state.role == R_PRIMARY || diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 161cd414b036..1e9f754b66ac 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -162,6 +162,7 @@ enum drbd_ret_code { ERR_INVALID_REQUEST = 162, ERR_NEED_APV_100 = 163, ERR_NEED_ALLOW_TWO_PRI = 164, + ERR_MD_UNCLEAN = 165, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -321,7 +322,8 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_FULL_SYNC (1 << 3) #define MDF_WAS_UP_TO_DATE (1 << 4) #define MDF_PEER_OUT_DATED (1 << 5) -#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_AL_CLEAN (1 << 7) enum drbd_uuid_index { UI_CURRENT, @@ -341,10 +343,16 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) +/* magic numbers used in meta data and network packets */ #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a #define DRBD_MAGIC_100 0x8620ec20 +#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3) +#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4) +#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5) + + /* how I came up with this magic? * base64 decode "actlog==" ;) */ #define DRBD_AL_MAGIC 0x69cb65a2 -- cgit v1.2.3 From 65d94927e036cd8e8e1406fa7fc387b4ae730159 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Jul 2011 10:24:51 +0200 Subject: drbd: Changed some defaults * Enabled the resync controller, with a fill target of 50Kib. That gives reasonable resync speeds without tuning. A much better default than the 250KiB/s fixed. * Enable bitmap compression. It is save to use, and most people have more CPU power than network bandwidth. * ko-count of 7: Abort a connection if the peer fails to process a write request within 42 seconds. * al-extents of 1237: ~5 GiB seems to be a much more sane default these days. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index ddd332db2a5d..defdebfecb72 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX 256 +#define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_VOLUME_MAX 65535 @@ -99,7 +99,7 @@ * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 -#define DRBD_KO_COUNT_DEF 0 +#define DRBD_KO_COUNT_DEF 7 /* } */ /* syncer { */ @@ -117,7 +117,7 @@ * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 -#define DRBD_AL_EXTENTS_DEF 127 +#define DRBD_AL_EXTENTS_DEF 1237 #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -151,7 +151,7 @@ #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 -#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ +#define DRBD_C_PLAN_AHEAD_DEF 20 #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 @@ -159,7 +159,7 @@ #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ -#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ +#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ #define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ #define DRBD_C_MAX_RATE_MAX (4 << 20) @@ -167,7 +167,7 @@ #define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ #define DRBD_C_MIN_RATE_MAX (4 << 20) -#define DRBD_C_MIN_RATE_DEF 4096 +#define DRBD_C_MIN_RATE_DEF 250 #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ @@ -187,6 +187,6 @@ #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -#define DRBD_USE_RLE_DEF 0 +#define DRBD_USE_RLE_DEF 1 #endif -- cgit v1.2.3 From 32bdb64038ba3127245912dae2cc8a450bb1d705 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 May 2011 18:26:20 +0200 Subject: drbd: Define scale factors in a single place Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index defdebfecb72..cd3565cfed44 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -18,29 +18,35 @@ #define DRBD_MINOR_COUNT_MIN 1 #define DRBD_MINOR_COUNT_MAX (1U << 20) #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_MINOR_COUNT_SCALE '1' #define DRBD_VOLUME_MAX 65535 #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 +#define DRBD_DIALOG_REFRESH_SCALE '1' /* valid port number */ #define DRBD_PORT_MIN 1 #define DRBD_PORT_MAX 0xffff +#define DRBD_PORT_SCALE '1' /* startup { */ /* if you want more than 3.4 days, disable */ #define DRBD_WFC_TIMEOUT_MIN 0 #define DRBD_WFC_TIMEOUT_MAX 300000 #define DRBD_WFC_TIMEOUT_DEF 0 +#define DRBD_WFC_TIMEOUT_SCALE '1' #define DRBD_DEGR_WFC_TIMEOUT_MIN 0 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0 +#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1' #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1' /* }*/ /* net { */ @@ -49,6 +55,7 @@ #define DRBD_TIMEOUT_MIN 1 #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ +#define DRBD_TIMEOUT_SCALE '1' /* If backing disk takes longer than disk_timeout, mark the disk as failed */ #define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ @@ -60,46 +67,55 @@ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 #define DRBD_CONNECT_INT_DEF 10 /* seconds */ +#define DRBD_CONNECT_INT_SCALE '1' /* keep-alive probes when idle */ #define DRBD_PING_INT_MIN 1 #define DRBD_PING_INT_MAX 120 #define DRBD_PING_INT_DEF 10 +#define DRBD_PING_INT_SCALE '1' /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 #define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 +#define DRBD_PING_TIMEO_SCALE '1' /* max number of write requests between write barriers */ #define DRBD_MAX_EPOCH_SIZE_MIN 1 #define DRBD_MAX_EPOCH_SIZE_MAX 20000 #define DRBD_MAX_EPOCH_SIZE_DEF 2048 +#define DRBD_MAX_EPOCH_SIZE_SCALE '1' /* I don't think that a tcp send buffer of more than 10M is useful */ #define DRBD_SNDBUF_SIZE_MIN 0 #define DRBD_SNDBUF_SIZE_MAX (10<<20) #define DRBD_SNDBUF_SIZE_DEF 0 +#define DRBD_SNDBUF_SIZE_SCALE '1' #define DRBD_RCVBUF_SIZE_MIN 0 #define DRBD_RCVBUF_SIZE_MAX (10<<20) #define DRBD_RCVBUF_SIZE_DEF 0 +#define DRBD_RCVBUF_SIZE_SCALE '1' /* @4k PageSize -> 128kB - 512MB */ #define DRBD_MAX_BUFFERS_MIN 32 #define DRBD_MAX_BUFFERS_MAX 131072 #define DRBD_MAX_BUFFERS_DEF 2048 +#define DRBD_MAX_BUFFERS_SCALE '1' /* @4k PageSize -> 4kB - 512MB */ #define DRBD_UNPLUG_WATERMARK_MIN 1 #define DRBD_UNPLUG_WATERMARK_MAX 131072 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) +#define DRBD_UNPLUG_WATERMARK_SCALE '1' /* 0 is disabled. * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 #define DRBD_KO_COUNT_DEF 7 +#define DRBD_KO_COUNT_SCALE '1' /* } */ /* syncer { */ @@ -118,6 +134,7 @@ #define DRBD_AL_EXTENTS_MIN 7 #define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 1237 +#define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 #define DRBD_MINOR_NUMBER_MAX (1<<30) @@ -148,34 +165,42 @@ #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 +#define DRBD_MAX_BIO_BVECS_SCALE '1' #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 #define DRBD_C_PLAN_AHEAD_DEF 20 +#define DRBD_C_PLAN_AHEAD_SCALE '1' #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 #define DRBD_C_DELAY_TARGET_DEF 10 +#define DRBD_C_DELAY_TARGET_SCALE '1' #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ #define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ +#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */ -#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MIN 250 #define DRBD_C_MAX_RATE_MAX (4 << 20) #define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MIN 0 #define DRBD_C_MIN_RATE_MAX (4 << 20) #define DRBD_C_MIN_RATE_DEF 250 +#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */ #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ #define DRBD_CONG_FILL_DEF 0 +#define DRBD_CONG_FILL_SCALE 's' /* sectors */ #define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE #define DRBD_PROTOCOL_DEF DRBD_PROT_C -- cgit v1.2.3 From 0317d9ecbc9bac43642b4aa70e3e1106f4fd26a1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 13 Jul 2011 13:40:30 +0200 Subject: drbd: Fix the maximum accepted minor device number The maximum minor device number allowed by the kernel is (1<<20 - 1). Reject device numbers higher than that to earlier catch possible errors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index cd3565cfed44..7d956e91ae7b 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -137,7 +137,7 @@ #define DRBD_AL_EXTENTS_SCALE '1' #define DRBD_MINOR_NUMBER_MIN -1 -#define DRBD_MINOR_NUMBER_MAX (1<<30) +#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1) #define DRBD_MINOR_NUMBER_DEF -1 #define DRBD_MINOR_NUMBER_SCALE '1' -- cgit v1.2.3 From b80c043327ea4faac62a329a1d35f16c47a5128e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 18 Jul 2011 11:09:17 +0200 Subject: drbd: The minor_count module parameter is only a hint nowadays * The max of minor_count is 255 * In drbdadm count the number of minors, instead of finding the highest minor number * No longer us the magic in the init script Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7d956e91ae7b..6d0a24331ed2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,7 +16,7 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX (1U << 20) +#define DRBD_MINOR_COUNT_MAX 255 #define DRBD_MINOR_COUNT_DEF 32 #define DRBD_MINOR_COUNT_SCALE '1' -- cgit v1.2.3 From 380207d08e7c4d1b19c0323777278992b4fbf9d6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Nov 2011 12:31:20 +0100 Subject: drbd: Load balancing of read requests New config option for the disk secition "read-balancing", with the values: prefer-local, prefer-remote, round-robin, when-congested-remote. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_req.c | 57 +++++++++++++++++++++++++++++++++++++- include/linux/drbd.h | 8 ++++++ include/linux/drbd_genl.h | 1 + include/linux/drbd_limits.h | 1 + 6 files changed, 68 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d397681fb7aa..e2cccb40f5af 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -698,6 +698,7 @@ enum { AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ B_RS_H_DONE, /* Before resync handler done (already executed) */ DISCARD_MY_DATA, /* discard_my_data flag per volume */ + READ_BALANCE_RR, }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e546dd3fab8a..733b8bd663d5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", + dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ceb04a94aace..98251e2a7fb7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + /* if it is still queued, we may not complete it here. * it will be canceled soon. */ if (!(req->rq_state & RQ_NET_QUEUED)) @@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; + + if (!(req->rq_state & RQ_WRITE) && + mdev->state.disk == D_UP_TO_DATE && + !IS_ERR_OR_NULL(req->private_bio)) + goto goto_read_retry_local; + _req_may_be_done_not_susp(req, m); /* else: done by HANDED_OVER_TO_NETWORK */ break; + goto_read_retry_local: + req->rq_state |= RQ_LOCAL_PENDING; + req->private_bio->bi_bdev = mdev->ldev->backing_bdev; + generic_make_request(req->private_bio); + break; + case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; @@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); + if (!IS_ERR_OR_NULL(req->private_bio)) { + bio_put(req->private_bio); + req->private_bio = NULL; + put_ldev(mdev); + } _req_may_be_done_not_susp(req, m); break; }; @@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } +static bool remote_due_to_read_balancing(struct drbd_conf *mdev) +{ + enum drbd_read_balancing rbm; + struct backing_dev_info *bdi; + + if (mdev->state.pdsk < D_UP_TO_DATE) + return false; + + rcu_read_lock(); + rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; + rcu_read_unlock(); + + switch (rbm) { + case RB_CONGESTED_REMOTE: + bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + return bdi_read_congested(bdi); + case RB_LEAST_PENDING: + return atomic_read(&mdev->local_cnt) > + atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + case RB_ROUND_ROBIN: + return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); + case RB_PREFER_REMOTE: + return true; + case RB_PREFER_LOCAL: + default: + return false; + } +} + /* * complete_conflicting_writes - wait for any conflicting write requests * @@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); + } else if (remote_due_to_read_balancing(mdev)) { + /* Keep the private bio in case we need it + for a local retry */ + local = 0; } } remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; @@ -1017,7 +1072,7 @@ fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, &req->i); fail_and_free_req: - if (local) { + if (!IS_ERR_OR_NULL(req->private_bio)) { bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e9f754b66ac..157ba3d74dc7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -102,6 +102,14 @@ enum drbd_on_congestion { OC_DISCONNECT, }; +enum drbd_read_balancing { + RB_PREFER_LOCAL, + RB_PREFER_REMOTE, + RB_ROUND_ROBIN, + RB_LEAST_PENDING, + RB_CONGESTED_REMOTE, +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_code { ERR_CODE_BASE = 100, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 2e6cefefe5e5..826008f297fe 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) + __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 6d0a24331ed2..17ef66a5c114 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -161,6 +161,7 @@ #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_ON_CONGESTION_DEF OC_BLOCK +#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 -- cgit v1.2.3 From d60de03a6694302b691bdf858ede9cbdfb7112d6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 17 Nov 2011 10:12:31 +0100 Subject: drbd: Load balancing method: striping Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 13 +++++++++++-- include/linux/drbd.h | 6 ++++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 98251e2a7fb7..5b28de0c5960 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -745,10 +745,11 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; } -static bool remote_due_to_read_balancing(struct drbd_conf *mdev) +static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector) { enum drbd_read_balancing rbm; struct backing_dev_info *bdi; + int stripe_shift; if (mdev->state.pdsk < D_UP_TO_DATE) return false; @@ -764,6 +765,14 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev) case RB_LEAST_PENDING: return atomic_read(&mdev->local_cnt) > atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); + case RB_32K_STRIPING: /* stripe_shift = 15 */ + case RB_64K_STRIPING: + case RB_128K_STRIPING: + case RB_256K_STRIPING: + case RB_512K_STRIPING: + case RB_1M_STRIPING: /* stripe_shift = 20 */ + stripe_shift = (rbm - RB_32K_STRIPING + 15); + return (sector >> (stripe_shift - 9)) & 1; case RB_ROUND_ROBIN: return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); case RB_PREFER_REMOTE: @@ -841,7 +850,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s bio_put(req->private_bio); req->private_bio = NULL; put_ldev(mdev); - } else if (remote_due_to_read_balancing(mdev)) { + } else if (remote_due_to_read_balancing(mdev, sector)) { /* Keep the private bio in case we need it for a local retry */ local = 0; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 157ba3d74dc7..1e86156c10f7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -108,6 +108,12 @@ enum drbd_read_balancing { RB_ROUND_ROBIN, RB_LEAST_PENDING, RB_CONGESTED_REMOTE, + RB_32K_STRIPING, + RB_64K_STRIPING, + RB_128K_STRIPING, + RB_256K_STRIPING, + RB_512K_STRIPING, + RB_1M_STRIPING, }; /* KEEP the order, do not delete or insert. Only append. */ -- cgit v1.2.3 From 26ec92871be1e6bd48d0be9ab38ee1ebbeea49f1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 Jul 2012 20:36:03 +0200 Subject: drbd: Stop using NLA_PUT*(). These macros no longer exist in kernel version v3.5-rc1. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 54 +++++++++++++++++++++++---------------- include/linux/genl_magic_func.h | 8 +++--- include/linux/genl_magic_struct.h | 16 ++++++------ 3 files changed, 45 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index cbd45de533cb..dc5bd6bbb280 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2554,13 +2554,17 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsi nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); if (!nla) goto nla_put_failure; - if (vnr != VOLUME_UNSPECIFIED) - NLA_PUT_U32(skb, T_ctx_volume, vnr); - NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name); - if (tconn->my_addr_len) - NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr); - if (tconn->peer_addr_len) - NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr); + if (vnr != VOLUME_UNSPECIFIED && + nla_put_u32(skb, T_ctx_volume, vnr)) + goto nla_put_failure; + if (nla_put_string(skb, T_ctx_resource_name, tconn->name)) + goto nla_put_failure; + if (tconn->my_addr_len && + nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr)) + goto nla_put_failure; + if (tconn->peer_addr_len && + nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr)) + goto nla_put_failure; nla_nest_end(skb, nla); return 0; @@ -2618,20 +2622,23 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; - NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY); - NLA_PUT_U32(skb, T_current_state, mdev->state.i); - NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid); - NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)); + if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || + nla_put_u32(skb, T_current_state, mdev->state.i) || + nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev))) + goto nla_put_failure; if (got_ldev) { - NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags); - NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid); - NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev)); - NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev)); + if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) || + nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid) || + nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) || + nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev))) + goto nla_put_failure; if (C_SYNC_SOURCE <= mdev->state.conn && C_PAUSED_SYNC_T >= mdev->state.conn) { - NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total); - NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed); + if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) || + nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed)) + goto nla_put_failure; } } @@ -2641,15 +2648,18 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, case SIB_GET_STATUS_REPLY: break; case SIB_STATE_CHANGE: - NLA_PUT_U32(skb, T_prev_state, sib->os.i); - NLA_PUT_U32(skb, T_new_state, sib->ns.i); + if (nla_put_u32(skb, T_prev_state, sib->os.i) || + nla_put_u32(skb, T_new_state, sib->ns.i)) + goto nla_put_failure; break; case SIB_HELPER_POST: - NLA_PUT_U32(skb, - T_helper_exit_code, sib->helper_exit_code); + if (nla_put_u32(skb, T_helper_exit_code, + sib->helper_exit_code)) + goto nla_put_failure; /* fall through */ case SIB_HELPER_PRE: - NLA_PUT_STRING(skb, T_helper, sib->helper_name); + if (nla_put_string(skb, T_helper, sib->helper_name)) + goto nla_put_failure; break; } } diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 0b8a88e2e83e..023bc346b877 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -367,7 +367,8 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ __is_signed) \ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_FIELD(">>", nla_type, name, s, NULL); \ - __put(skb, attr_nr, s->name); \ + if (__put(skb, attr_nr, s->name)) \ + goto nla_put_failure; \ } #undef __array @@ -375,9 +376,10 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ __get, __put, __is_signed) \ if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ - __put(skb, attr_nr, min_t(int, maxlen, \ + if (__put(skb, attr_nr, min_t(int, maxlen, \ s->name ## _len + (nla_type == NLA_NUL_STRING)),\ - s->name); \ + s->name)) \ + goto nla_put_failure; \ } #include GENL_MAGIC_INCLUDE_FILE diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 1d0bd79e27b3..eecd19b37001 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -65,28 +65,28 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, char, \ - nla_get_u8, NLA_PUT_U8, false) + nla_get_u8, nla_put_u8, false) #define __u8_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ - nla_get_u8, NLA_PUT_U8, false) + nla_get_u8, nla_put_u8, false) #define __u16_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ - nla_get_u16, NLA_PUT_U16, false) + nla_get_u16, nla_put_u16, false) #define __u32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ - nla_get_u32, NLA_PUT_U32, false) + nla_get_u32, nla_put_u32, false) #define __s32_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ - nla_get_u32, NLA_PUT_U32, true) + nla_get_u32, nla_put_u32, true) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ - nla_get_u64, NLA_PUT_U64, false) + nla_get_u64, nla_put_u64, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, NLA_PUT, false) + nla_strlcpy, nla_put, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ - nla_memcpy, NLA_PUT, false) + nla_memcpy, nla_put, false) /* fields with default values */ #define __flg_field_def(attr_nr, attr_flag, name, default) \ -- cgit v1.2.3 From 9a51ab1c1b3c1e21f076cdd571bbe6ca7d1b504c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 20 Feb 2012 21:53:28 +0100 Subject: drbd: New disk option al-updates By disabling al-updates one might increase performace. The price for that is that in case a crashed primary (that had al-updates disabled) is reintegraded, it will receive a full-resync instead of a bitmap based resync. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 12 ++++++++++-- drivers/block/drbd/drbd_nl.c | 17 +++++++++++++++-- include/linux/drbd.h | 1 + include/linux/drbd_genl.h | 3 +++ include/linux/drbd_limits.h | 1 + 5 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9eae28944312..83d48d210b69 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -276,8 +276,16 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i) /* Double check: it may have been committed by someone else, * while we have been waiting for the lock. */ if (mdev->act_log->pending_changes) { - al_write_transaction(mdev); - mdev->al_writ_cnt++; + bool write_al_updates; + + rcu_read_lock(); + write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates; + rcu_read_unlock(); + + if (write_al_updates) { + al_write_transaction(mdev); + mdev->al_writ_cnt++; + } spin_lock_irq(&mdev->al_lock); /* FIXME diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index dc5bd6bbb280..c5d4fac1a111 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&mdev->tconn->conf_update); + if (new_disk_conf->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush); drbd_md_sync(mdev); @@ -1545,7 +1550,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } else if (dd == grew) set_bit(RESYNC_AFTER_NEG, &mdev->flags); - if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) || + (test_bit(CRASHED_PRIMARY, &mdev->flags) && + drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, @@ -1588,13 +1595,19 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (ns.disk == D_CONSISTENT && (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; - rcu_read_unlock(); /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before this point, because drbd_request_state() modifies these flags. */ + if (rcu_dereference(mdev->ldev->disk_conf)->al_updates) + mdev->ldev->md.flags &= MDF_AL_DISABLED; + else + mdev->ldev->md.flags |= MDF_AL_DISABLED; + + rcu_read_unlock(); + /* In case we are C_CONNECTED postpone any decision on the new disk state after the negotiation phase. */ if (mdev->state.conn == C_CONNECTED) { diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e86156c10f7..36ae7dd28d90 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -338,6 +338,7 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_PEER_OUT_DATED (1 << 5) #define MDF_CRASHED_PRIMARY (1 << 6) #define MDF_AL_CLEAN (1 << 7) +#define MDF_AL_DISABLED (1 << 8) enum drbd_uuid_index { UI_CURRENT, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 826008f297fe..92ec4b50a885 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -130,6 +130,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) + /* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */ + __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -168,6 +170,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ ) GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 17ef66a5c114..1fa19c5f5e64 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -210,6 +210,7 @@ #define DRBD_DISK_DRAIN_DEF 1 #define DRBD_MD_FLUSHES_DEF 1 #define DRBD_TCP_CORK_DEF 1 +#define DRBD_AL_UPDATES_DEF 1 #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -- cgit v1.2.3 From 37be2f59d3149b95afaeeeff94edde2c07f165d2 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 8 Nov 2012 11:22:46 -0500 Subject: ext4: remove ext4_handle_release_buffer() ext4_handle_release_buffer() was intended to remove journal write access from a buffer, but it doesn't actually do anything at all other than add a BUFFER_TRACE point, but it's not reliably used for that either. Remove all the associated dead code. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" Reviewed-by: Carlos Maiolino --- fs/ext4/ext4_jbd2.h | 7 ------- fs/ext4/resize.c | 17 +++-------------- fs/ext4/xattr.c | 1 - fs/jbd2/journal.c | 1 - fs/jbd2/transaction.c | 11 ----------- include/linux/jbd2.h | 1 - 6 files changed, 3 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 56d258c18303..7177f9b21cb2 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -254,13 +254,6 @@ static inline void ext4_handle_sync(handle_t *handle) handle->h_sync = 1; } -static inline void ext4_handle_release_buffer(handle_t *handle, - struct buffer_head *bh) -{ - if (ext4_handle_valid(handle)) - jbd2_journal_release_buffer(handle, bh); -} - static inline int ext4_handle_is_aborted(handle_t *handle) { if (ext4_handle_valid(handle)) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 47bf06a2765d..d99387b89edd 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -783,7 +783,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) - goto exit_sbh; + goto exit_dind; err = ext4_journal_get_write_access(handle, dind); if (unlikely(err)) @@ -792,7 +792,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, /* ext4_reserve_inode_write() gets a reference on the iloc */ err = ext4_reserve_inode_write(handle, inode, &iloc); if (unlikely(err)) - goto exit_dindj; + goto exit_dind; n_group_desc = ext4_kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *), @@ -846,12 +846,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, exit_inode: ext4_kvfree(n_group_desc); - /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); -exit_dindj: - /* ext4_handle_release_buffer(handle, dind); */ -exit_sbh: - /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ exit_dind: brelse(dind); exit_bh: @@ -969,14 +964,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } for (i = 0; i < reserved_gdb; i++) { - if ((err = ext4_journal_get_write_access(handle, primary[i]))) { - /* - int j; - for (j = 0; j < i; j++) - ext4_handle_release_buffer(handle, primary[j]); - */ + if ((err = ext4_journal_get_write_access(handle, primary[i]))) goto exit_bh; - } } if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 2cdb98d62980..b1adda1b750d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -794,7 +794,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, int offset = (char *)s->here - bs->bh->b_data; unlock_buffer(bs->bh); - ext4_handle_release_buffer(handle, bs->bh); if (ce) { mb_cache_entry_release(ce); ce = NULL; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 484b8d1c6cb6..dbf41f9452db 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -60,7 +60,6 @@ EXPORT_SYMBOL(jbd2_journal_get_create_access); EXPORT_SYMBOL(jbd2_journal_get_undo_access); EXPORT_SYMBOL(jbd2_journal_set_triggers); EXPORT_SYMBOL(jbd2_journal_dirty_metadata); -EXPORT_SYMBOL(jbd2_journal_release_buffer); EXPORT_SYMBOL(jbd2_journal_forget); #if 0 EXPORT_SYMBOL(journal_sync_buffer); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a74ba4659549..deffd945c8e2 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1207,17 +1207,6 @@ out: return ret; } -/* - * jbd2_journal_release_buffer: undo a get_write_access without any buffer - * updates, if the update decided in the end that it didn't need access. - * - */ -void -jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - BUFFER_TRACE(bh, "entry"); -} - /** * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. * @handle: transaction handle diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 3efc43f3f162..de7f55682088 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1096,7 +1096,6 @@ extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); void jbd2_journal_set_triggers(struct buffer_head *, struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); -extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); extern void jbd2_journal_invalidatepage(journal_t *, -- cgit v1.2.3 From fd47d3e1c2949838e858379aaf2bc20647be2912 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Thu, 8 Nov 2012 11:24:46 -0500 Subject: jbd2: remove VLAIS usage from JBD2 code The use of variable length arrays in structs (VLAIS) in the Linux Kernel code precludes the use of compilers which don't implement VLAIS (for instance the Clang compiler). Since ctx is always a 32-bit CRC, hard coding a size of 4 bytes accomplishes the same thing without the use of VLAIS. This is the same technique already employed in fs/ext4/ext4.h Signed-off-by: Mark Charlebois Signed-off-by: Behan Webster Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index de7f55682088..1be23d9fdacb 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1302,15 +1302,21 @@ static inline int jbd_space_needed(journal_t *journal) extern int jbd_blocks_per_page(struct inode *inode); +/* JBD uses a CRC32 checksum */ +#define JBD_MAX_CHECKSUM_SIZE 4 + static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(journal->j_chksum_driver)]; + char ctx[JBD_MAX_CHECKSUM_SIZE]; } desc; int err; + BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > + JBD_MAX_CHECKSUM_SIZE); + desc.shash.tfm = journal->j_chksum_driver; desc.shash.flags = 0; *(u32 *)desc.ctx = crc; -- cgit v1.2.3 From 2be975c6d920de989ff5e4bc09ffe87e59d94662 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 3 Nov 2012 12:16:12 -0700 Subject: Input: introduce managed input devices (add devres support) There is a demand from driver's writers to use managed devices framework for their drivers. Unfortunately up to this moment input devices did not provide support for managed devices and that lead to mixing two styles of resource management which usually introduced more bugs, such as manually unregistering input device but relying in devres to free interrupt handler which (unless device is properly shut off) can cause ISR to reference already freed memory. This change introduces devm_input_allocate_device() that will allocate managed instance of input device so that driver writers who prefer using devm_* framework do not have to mix 2 styles. Reviewed-by: Henrik Rydberg Reviewed-by: Tejun Heo Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 176 ++++++++++++++++++++++++++++++++++++++++++-------- include/linux/input.h | 7 +- 2 files changed, 155 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index f1be1a77edf3..ce01332f7b3a 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1726,7 +1726,7 @@ EXPORT_SYMBOL_GPL(input_class); /** * input_allocate_device - allocate memory for new input device * - * Returns prepared struct input_dev or NULL. + * Returns prepared struct input_dev or %NULL. * * NOTE: Use input_free_device() to free devices that have not been * registered; input_unregister_device() should be used for already @@ -1753,6 +1753,70 @@ struct input_dev *input_allocate_device(void) } EXPORT_SYMBOL(input_allocate_device); +struct input_devres { + struct input_dev *input; +}; + +static int devm_input_device_match(struct device *dev, void *res, void *data) +{ + struct input_devres *devres = res; + + return devres->input == data; +} + +static void devm_input_device_release(struct device *dev, void *res) +{ + struct input_devres *devres = res; + struct input_dev *input = devres->input; + + dev_dbg(dev, "%s: dropping reference to %s\n", + __func__, dev_name(&input->dev)); + input_put_device(input); +} + +/** + * devm_input_allocate_device - allocate managed input device + * @dev: device owning the input device being created + * + * Returns prepared struct input_dev or %NULL. + * + * Managed input devices do not need to be explicitly unregistered or + * freed as it will be done automatically when owner device unbinds from + * its driver (or binding fails). Once managed input device is allocated, + * it is ready to be set up and registered in the same fashion as regular + * input device. There are no special devm_input_device_[un]register() + * variants, regular ones work with both managed and unmanaged devices. + * + * NOTE: the owner device is set up as parent of input device and users + * should not override it. + */ + +struct input_dev *devm_input_allocate_device(struct device *dev) +{ + struct input_dev *input; + struct input_devres *devres; + + devres = devres_alloc(devm_input_device_release, + sizeof(struct input_devres), GFP_KERNEL); + if (!devres) + return NULL; + + input = input_allocate_device(); + if (!input) { + devres_free(devres); + return NULL; + } + + input->dev.parent = dev; + input->devres_managed = true; + + devres->input = input; + devres_add(dev, devres); + + return input; +} +EXPORT_SYMBOL(devm_input_allocate_device); + /** * input_free_device - free memory occupied by input_dev structure * @dev: input device to free @@ -1769,8 +1833,14 @@ EXPORT_SYMBOL(input_allocate_device); */ void input_free_device(struct input_dev *dev) { - if (dev) + if (dev) { + if (dev->devres_managed) + WARN_ON(devres_destroy(dev->dev.parent, + devm_input_device_release, + devm_input_device_match, + dev)); input_put_device(dev); + } } EXPORT_SYMBOL(input_free_device); @@ -1891,6 +1961,38 @@ static void input_cleanse_bitmasks(struct input_dev *dev) INPUT_CLEANSE_BITMASK(dev, SW, sw); } +static void __input_unregister_device(struct input_dev *dev) +{ + struct input_handle *handle, *next; + + input_disconnect_device(dev); + + mutex_lock(&input_mutex); + + list_for_each_entry_safe(handle, next, &dev->h_list, d_node) + handle->handler->disconnect(handle); + WARN_ON(!list_empty(&dev->h_list)); + + del_timer_sync(&dev->timer); + list_del_init(&dev->node); + + input_wakeup_procfs_readers(); + + mutex_unlock(&input_mutex); + + device_del(&dev->dev); +} + +static void devm_input_device_unregister(struct device *dev, void *res) +{ + struct input_devres *devres = res; + struct input_dev *input = devres->input; + + dev_dbg(dev, "%s: unregistering device %s\n", + __func__, dev_name(&input->dev)); + __input_unregister_device(input); +} + /** * input_register_device - register device with input core * @dev: device to be registered @@ -1906,11 +2008,21 @@ static void input_cleanse_bitmasks(struct input_dev *dev) int input_register_device(struct input_dev *dev) { static atomic_t input_no = ATOMIC_INIT(0); + struct input_devres *devres = NULL; struct input_handler *handler; unsigned int packet_size; const char *path; int error; + if (dev->devres_managed) { + devres = devres_alloc(devm_input_device_unregister, + sizeof(struct input_devres), GFP_KERNEL); + if (!devres) + return -ENOMEM; + + devres->input = dev; + } + /* Every input device generates EV_SYN/SYN_REPORT events. */ __set_bit(EV_SYN, dev->evbit); @@ -1926,8 +2038,10 @@ int input_register_device(struct input_dev *dev) dev->max_vals = max(dev->hint_events_per_packet, packet_size) + 2; dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL); - if (!dev->vals) - return -ENOMEM; + if (!dev->vals) { + error = -ENOMEM; + goto err_devres_free; + } /* * If delay and period are pre-set by the driver, then autorepeating @@ -1952,7 +2066,7 @@ int input_register_device(struct input_dev *dev) error = device_add(&dev->dev); if (error) - return error; + goto err_free_vals; path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); pr_info("%s as %s\n", @@ -1961,10 +2075,8 @@ int input_register_device(struct input_dev *dev) kfree(path); error = mutex_lock_interruptible(&input_mutex); - if (error) { - device_del(&dev->dev); - return error; - } + if (error) + goto err_device_del; list_add_tail(&dev->node, &input_dev_list); @@ -1975,7 +2087,21 @@ int input_register_device(struct input_dev *dev) mutex_unlock(&input_mutex); + if (dev->devres_managed) { + dev_dbg(dev->dev.parent, "%s: registering %s with devres.\n", + __func__, dev_name(&dev->dev)); + devres_add(dev->dev.parent, devres); + } return 0; + +err_device_del: + device_del(&dev->dev); +err_free_vals: + kfree(dev->vals); + dev->vals = NULL; +err_devres_free: + devres_free(devres); + return error; } EXPORT_SYMBOL(input_register_device); @@ -1988,24 +2114,20 @@ EXPORT_SYMBOL(input_register_device); */ void input_unregister_device(struct input_dev *dev) { - struct input_handle *handle, *next; - - input_disconnect_device(dev); - - mutex_lock(&input_mutex); - - list_for_each_entry_safe(handle, next, &dev->h_list, d_node) - handle->handler->disconnect(handle); - WARN_ON(!list_empty(&dev->h_list)); - - del_timer_sync(&dev->timer); - list_del_init(&dev->node); - - input_wakeup_procfs_readers(); - - mutex_unlock(&input_mutex); - - device_unregister(&dev->dev); + if (dev->devres_managed) { + WARN_ON(devres_destroy(dev->dev.parent, + devm_input_device_unregister, + devm_input_device_match, + dev)); + __input_unregister_device(dev); + /* + * We do not do input_put_device() here because it will be done + * when 2nd devres fires up. + */ + } else { + __input_unregister_device(dev); + input_put_device(dev); + } } EXPORT_SYMBOL(input_unregister_device); diff --git a/include/linux/input.h b/include/linux/input.h index cab994ba6d91..5538cc09a4f5 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -112,6 +112,8 @@ struct input_value { * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held * @node: used to place the device onto input_dev_list + * @devres_managed: indicates that devices is managed with devres framework + * and needs not be explicitly unregistered or freed. */ struct input_dev { const char *name; @@ -180,6 +182,8 @@ struct input_dev { unsigned int num_vals; unsigned int max_vals; struct input_value *vals; + + bool devres_managed; }; #define to_input_dev(d) container_of(d, struct input_dev, dev) @@ -323,7 +327,8 @@ struct input_handle { struct list_head h_node; }; -struct input_dev *input_allocate_device(void); +struct input_dev __must_check *input_allocate_device(void); +struct input_dev __must_check *devm_input_allocate_device(struct device *); void input_free_device(struct input_dev *dev); static inline struct input_dev *input_get_device(struct input_dev *dev) -- cgit v1.2.3 From 58ffa580a748dd16b1e5ab260bea39cdbd1e94ef Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 26 Jul 2012 14:09:49 +0200 Subject: drbd: introduce stop-sector to online verify We now can schedule only a specific range of sectors for online verify, or interrupt a running verify without interrupting the connection. Had to bump the protocol version differently, we are now 101. Added verify_can_do_stop_sector() { protocol >= 97 && protocol != 100; } Also, the return value convention for worker callbacks has changed, we returned "true/false" for "keep the connection up" in 8.3, we return 0 for success and <= for failure in 8.4. Affected: receive_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 +++++++ drivers/block/drbd/drbd_nl.c | 14 +++++++++----- drivers/block/drbd/drbd_proc.c | 12 +++++++++--- drivers/block/drbd/drbd_receiver.c | 10 +++++++++- drivers/block/drbd/drbd_state.c | 17 +++++++++++++---- drivers/block/drbd/drbd_worker.c | 33 +++++++++++++++++++++++++++------ include/linux/drbd.h | 2 +- include/linux/drbd_genl.h | 1 + 8 files changed, 76 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 22adfc7189de..eddc4388a1b1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -971,6 +971,7 @@ struct drbd_conf { /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; + sector_t ov_stop_sector; /* where are we now? (sector) */ sector_t ov_position; /* Start sector of out of sync range (to merge printk reporting). */ @@ -2264,6 +2265,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); } +static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev) +{ + return mdev->tconn->agreed_pro_version >= 97 && + mdev->tconn->agreed_pro_version != 100; +} + static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) { int changed = mdev->ed_uuid != val; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4afd626ca3dc..eefb56308aea 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2939,6 +2939,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) { struct drbd_conf *mdev; enum drbd_ret_code retcode; + struct start_ov_parms parms; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -2947,19 +2948,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) goto out; mdev = adm_ctx.mdev; + + /* resume from last known position, if possible */ + parms.ov_start_sector = mdev->ov_start_sector; + parms.ov_stop_sector = ULLONG_MAX; if (info->attrs[DRBD_NLA_START_OV_PARMS]) { - /* resume from last known position, if possible */ - struct start_ov_parms parms = - { .ov_start_sector = mdev->ov_start_sector }; int err = start_ov_parms_from_attrs(&parms, info); if (err) { retcode = ERR_MANDATORY_TAG; drbd_msg_put_info(from_attrs_err_to_txt(err)); goto out; } - /* w_make_ov_request expects position to be aligned */ - mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT; } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); + mdev->ov_stop_sector = parms.ov_stop_sector; + /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ drbd_suspend_io(mdev); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index e0f0d2a6d538..56672a61eb94 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) * we convert to sectors in the display below. */ unsigned long bm_bits = drbd_bm_bits(mdev); unsigned long bit_pos; + unsigned long long stop_sector = 0; if (mdev->state.conn == C_VERIFY_S || - mdev->state.conn == C_VERIFY_T) + mdev->state.conn == C_VERIFY_T) { bit_pos = bm_bits - mdev->ov_left; - else + if (verify_can_do_stop_sector(mdev)) + stop_sector = mdev->ov_stop_sector; + } else bit_pos = mdev->bm_resync_fo; /* Total sectors may be slightly off for oddly * sized devices. So what. */ seq_printf(seq, - "\t%3d%% sector pos: %llu/%llu\n", + "\t%3d%% sector pos: %llu/%llu", (int)(bit_pos / (bm_bits/100+1)), (unsigned long long)bit_pos * BM_SECT_PER_BIT, (unsigned long long)bm_bits * BM_SECT_PER_BIT); + if (stop_sector != 0 && stop_sector != ULLONG_MAX) + seq_printf(seq, " stop sector: %llu", stop_sector); + seq_printf(seq, "\n"); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7fe6b01618d4..8fddec96dfbe 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3843,7 +3843,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) * already decided to close the connection again, * we must not "re-establish" it here. */ if (os.conn <= C_TEAR_DOWN) - return false; + return -ECONNRESET; /* If this is the "end of sync" confirmation, usually the peer disk * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits @@ -3875,6 +3875,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi) } } + /* explicit verify finished notification, stop sector reached. */ + if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && + peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { + ov_out_of_sync_print(mdev); + drbd_resync_finished(mdev); + return 0; + } + /* peer says his disk is inconsistent, while we think it is uptodate, * and this happens while the peer still thinks we have a sync going on, * but we think we are already done with the sync. diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 444581828d70..12f2b4fbe559 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -975,13 +975,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, wake_up(&mdev->state_wait); wake_up(&mdev->tconn->ping_wait); - /* aborted verify run. log the last position */ + /* Aborted verify run, or we reached the stop sector. + * Log the last position, unless end-of-device. */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && - ns.conn < C_CONNECTED) { + ns.conn <= C_CONNECTED) { mdev->ov_start_sector = BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); + if (mdev->ov_left) + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); } if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && @@ -1422,6 +1424,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) drbd_send_state(mdev, ns); + /* Verify finished, or reached stop sector. Peer did not know about + * the stop sector, and we may even have changed the stop sector during + * verify to interrupt/stop early. Send the new state. */ + if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED + && verify_can_do_stop_sector(mdev)) + drbd_send_state(mdev, ns); + /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags)) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9d7e1fb0f431..1c9c6fd332c3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -692,6 +692,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) int number, i, size; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + bool stop_sector_reached = false; if (unlikely(cancel)) return 1; @@ -700,9 +701,17 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) sector = mdev->ov_position; for (i = 0; i < number; i++) { - if (sector >= capacity) { + if (sector >= capacity) return 1; - } + + /* We check for "finished" only in the reply path: + * w_e_end_ov_reply(). + * We need to send at least one request out. */ + stop_sector_reached = i > 0 + && verify_can_do_stop_sector(mdev) + && sector >= mdev->ov_stop_sector; + if (stop_sector_reached) + break; size = BM_BLOCK_SIZE; @@ -726,7 +735,8 @@ static int w_make_ov_request(struct drbd_work *w, int cancel) requeue: mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); - mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + if (i == 0 || !stop_sector_reached) + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); return 1; } @@ -792,7 +802,12 @@ int drbd_resync_finished(struct drbd_conf *mdev) dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; if (dt <= 0) dt = 1; + db = mdev->rs_total; + /* adjust for verify start and stop sectors, respective reached position */ + if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) + db -= mdev->ov_left; + dbdt = Bit2KB(db/dt); mdev->rs_paused /= HZ; @@ -815,7 +830,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns.conn = C_CONNECTED; dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", - verify_done ? "Online verify " : "Resync", + verify_done ? "Online verify" : "Resync", dt + mdev->rs_paused, mdev->rs_paused, dbdt); n_oos = drbd_bm_total_weight(mdev); @@ -896,7 +911,9 @@ out: mdev->rs_total = 0; mdev->rs_failed = 0; mdev->rs_paused = 0; - if (verify_done) + + /* reset start sector, if we reached end of device */ + if (verify_done && mdev->ov_left == 0) mdev->ov_start_sector = 0; drbd_md_sync(mdev); @@ -1144,6 +1161,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) unsigned int size = peer_req->i.size; int digest_size; int err, eq = 0; + bool stop_sector_reached = false; if (unlikely(cancel)) { drbd_free_peer_req(mdev, peer_req); @@ -1194,7 +1212,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) if ((mdev->ov_left & 0x200) == 0x200) drbd_advance_rs_marks(mdev, mdev->ov_left); - if (mdev->ov_left == 0) { + stop_sector_reached = verify_can_do_stop_sector(mdev) && + (sector + (size>>9)) >= mdev->ov_stop_sector; + + if (mdev->ov_left == 0 || stop_sector_reached) { ov_out_of_sync_print(mdev); drbd_resync_finished(mdev); } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 36ae7dd28d90..5171c3530886 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -55,7 +55,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 100 +#define PRO_VERSION_MAX 101 enum drbd_io_error_p { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 92ec4b50a885..9430e9ab37a8 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -215,6 +215,7 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) + __u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector) ) GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, -- cgit v1.2.3 From 3174f8c5045ad247563434c4b4897bd89313eafc Mon Sep 17 00:00:00 2001 From: Philipp Marek Date: Sat, 3 Mar 2012 21:04:30 +0100 Subject: drbd: pass some more information to userspace. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 11 ++++++++++- include/linux/drbd_genl.h | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index eefb56308aea..466d6b1d9309 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2666,7 +2666,16 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev, if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || nla_put_u32(skb, T_current_state, mdev->state.i) || nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) || - nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev))) + nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) || + nla_put_u64(skb, T_send_cnt, mdev->send_cnt) || + nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) || + nla_put_u64(skb, T_read_cnt, mdev->read_cnt) || + nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) || + nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) || + nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) || + nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) || + nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) || + nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt))) goto nla_put_failure; if (got_ldev) { diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 9430e9ab37a8..d0d8fac8a6e4 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -211,6 +211,16 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, /* for pre and post notifications of helper execution */ __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) + + __u64_field(15, 0, send_cnt) + __u64_field(16, 0, recv_cnt) + __u64_field(17, 0, read_cnt) + __u64_field(18, 0, writ_cnt) + __u64_field(19, 0, al_writ_cnt) + __u64_field(20, 0, bm_writ_cnt) + __u32_field(21, 0, ap_bio_cnt) + __u32_field(22, 0, ap_pending_cnt) + __u32_field(23, 0, rs_pending_cnt) ) GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, -- cgit v1.2.3 From eb12010e9af119c84e6b2214064a98681027e0e3 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 1 Aug 2012 12:46:20 +0200 Subject: drbd: disambiguation, s/ERR_DISCARD/ERR_DISCARD_IMPOSSIBLE/ If for some reason (typically "split-brained" cluster manager) drbd replica data has diverged, we can chose a victim, and reconnect using "--discard-my-data", causing the victim to become sync-target, fetching all changed blocks from the peer. If we are Primary, we are potentially in use, and we refuse to "roll back" changes to the data below the page cache and other users. Rename the error symbol for this to ERR_DISCARD_IMPOSSIBLE. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- include/linux/drbd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 35bb572a2076..d1073705bf1f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1829,7 +1829,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n return ERR_STONITH_AND_PROT_A; } if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data) - return ERR_DISCARD; + return ERR_DISCARD_IMPOSSIBLE; } if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 5171c3530886..0b93e5e2e064 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -136,7 +136,7 @@ enum drbd_ret_code { ERR_AUTH_ALG = 120, ERR_AUTH_ALG_ND = 121, ERR_NOMEM = 122, - ERR_DISCARD = 123, + ERR_DISCARD_IMPOSSIBLE = 123, ERR_DISK_CONFIGURED = 124, ERR_NET_CONFIGURED = 125, ERR_MANDATORY_TAG = 126, -- cgit v1.2.3 From 328e0f125bf41f4f33f684db22015f92cb44fe56 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 19 Oct 2012 14:37:47 +0200 Subject: drbd: Broadcast sync progress no more often than once per second Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_nl.c | 6 ++++++ drivers/block/drbd/drbd_worker.c | 4 ++++ include/linux/drbd.h | 4 ++-- 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 057ffed6eb7e..784f4eb2ed61 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -965,6 +965,7 @@ struct drbd_conf { unsigned long rs_mark_time[DRBD_SYNC_MARKS]; /* current index into rs_mark_{left,time} */ int rs_last_mark; + unsigned long rs_last_bcast; /* [unit jiffies] */ /* where does the admin want us to start? (sector) */ sector_t ov_start_sector; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 298dd3e35e02..d339a2754a85 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3295,6 +3295,12 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib) unsigned seq; int err = -ENOMEM; + if (sib->sib_reason == SIB_SYNC_PROGRESS && + time_after(jiffies, mdev->rs_last_bcast + HZ)) + mdev->rs_last_bcast = jiffies; + else + return; + seq = atomic_inc_return(&drbd_genl_seq); msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); if (!msg) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 64a7305c678a..424dc7bdf9b7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1696,6 +1696,10 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) write_unlock_irq(&global_state_lock); if (r == SS_SUCCESS) { + /* reset rs_last_bcast when a resync or verify is started, + * to deal with potential jiffies wrap. */ + mdev->rs_last_bcast = jiffies - HZ; + dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", drbd_conn_str(ns.conn), (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0b93e5e2e064..0c5a18ec322c 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -52,8 +52,8 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.11" -#define API_VERSION 88 +#define REL_VERSION "8.4.2" +#define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- cgit v1.2.3 From 800963fd598e2acbcd3a21a17e3ab3c185ad0d6a Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Sat, 10 Nov 2012 00:32:36 -0800 Subject: Input: document new members of struct input_dev Fixes kernel-doc warnings for the members added in 3.7-rc1. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 5538cc09a4f5..82ce323b9986 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -112,6 +112,9 @@ struct input_value { * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held * @node: used to place the device onto input_dev_list + * @num_vals: number of values queued in the current frame + * @max_vals: maximum number of values queued in a frame + * @vals: array of values queued in the current frame * @devres_managed: indicates that devices is managed with devres framework * and needs not be explicitly unregistered or freed. */ -- cgit v1.2.3 From 76f93992e4c44f30be797d5c99d6f369ed001747 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 5 Nov 2012 16:10:31 +0100 Subject: mfd: Provide the STMPE driver with its own IRQ domain The STMPE driver is yet another IRQ controller which requires its own IRQ domain. So, we provide it with one. Acked-by: Arnd Bergmann Acked-by: Linus Walleij Signed-off-by: Lee Jones Signed-off-by: Samuel Ortiz --- drivers/mfd/stmpe.c | 82 +++++++++++++++++++++++++++++------------------ include/linux/mfd/stmpe.h | 2 ++ 2 files changed, 52 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index ad13cb00a749..5c8d8f260df2 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -757,7 +758,9 @@ static irqreturn_t stmpe_irq(int irq, void *data) int i; if (variant->id_val == STMPE801_ID) { - handle_nested_irq(stmpe->irq_base); + int base = irq_create_mapping(stmpe->domain, 0); + + handle_nested_irq(base); return IRQ_HANDLED; } @@ -778,8 +781,9 @@ static irqreturn_t stmpe_irq(int irq, void *data) while (status) { int bit = __ffs(status); int line = bank * 8 + bit; + int nestedirq = irq_create_mapping(stmpe->domain, line); - handle_nested_irq(stmpe->irq_base + line); + handle_nested_irq(nestedirq); status &= ~(1 << bit); } @@ -820,7 +824,7 @@ static void stmpe_irq_sync_unlock(struct irq_data *data) static void stmpe_irq_mask(struct irq_data *data) { struct stmpe *stmpe = irq_data_get_irq_chip_data(data); - int offset = data->irq - stmpe->irq_base; + int offset = data->hwirq; int regoffset = offset / 8; int mask = 1 << (offset % 8); @@ -830,7 +834,7 @@ static void stmpe_irq_mask(struct irq_data *data) static void stmpe_irq_unmask(struct irq_data *data) { struct stmpe *stmpe = irq_data_get_irq_chip_data(data); - int offset = data->irq - stmpe->irq_base; + int offset = data->hwirq; int regoffset = offset / 8; int mask = 1 << (offset % 8); @@ -845,43 +849,62 @@ static struct irq_chip stmpe_irq_chip = { .irq_unmask = stmpe_irq_unmask, }; -static int __devinit stmpe_irq_init(struct stmpe *stmpe) +static int stmpe_irq_map(struct irq_domain *d, unsigned int virq, + irq_hw_number_t hwirq) { + struct stmpe *stmpe = d->host_data; struct irq_chip *chip = NULL; - int num_irqs = stmpe->variant->num_irqs; - int base = stmpe->irq_base; - int irq; if (stmpe->variant->id_val != STMPE801_ID) chip = &stmpe_irq_chip; - for (irq = base; irq < base + num_irqs; irq++) { - irq_set_chip_data(irq, stmpe); - irq_set_chip_and_handler(irq, chip, handle_edge_irq); - irq_set_nested_thread(irq, 1); + irq_set_chip_data(virq, stmpe); + irq_set_chip_and_handler(virq, chip, handle_edge_irq); + irq_set_nested_thread(virq, 1); #ifdef CONFIG_ARM - set_irq_flags(irq, IRQF_VALID); + set_irq_flags(virq, IRQF_VALID); #else - irq_set_noprobe(irq); + irq_set_noprobe(virq); #endif - } return 0; } -static void stmpe_irq_remove(struct stmpe *stmpe) +static void stmpe_irq_unmap(struct irq_domain *d, unsigned int virq) { - int num_irqs = stmpe->variant->num_irqs; - int base = stmpe->irq_base; - int irq; - - for (irq = base; irq < base + num_irqs; irq++) { #ifdef CONFIG_ARM - set_irq_flags(irq, 0); + set_irq_flags(virq, 0); #endif - irq_set_chip_and_handler(irq, NULL, NULL); - irq_set_chip_data(irq, NULL); + irq_set_chip_and_handler(virq, NULL, NULL); + irq_set_chip_data(virq, NULL); +} + +static struct irq_domain_ops stmpe_irq_ops = { + .map = stmpe_irq_map, + .unmap = stmpe_irq_unmap, + .xlate = irq_domain_xlate_twocell, +}; + +static int __devinit stmpe_irq_init(struct stmpe *stmpe) +{ + int base = stmpe->irq_base; + int num_irqs = stmpe->variant->num_irqs; + + if (base) { + stmpe->domain = irq_domain_add_legacy( + NULL, num_irqs, base, 0, &stmpe_irq_ops, stmpe); + } + else { + stmpe->domain = irq_domain_add_linear( + NULL, num_irqs, &stmpe_irq_ops, stmpe); + } + + if (!stmpe->domain) { + dev_err(stmpe->dev, "Failed to create irqdomain\n"); + return -ENOSYS; } + + return 0; } static int __devinit stmpe_chip_init(struct stmpe *stmpe) @@ -954,7 +977,7 @@ static int __devinit stmpe_add_device(struct stmpe *stmpe, struct mfd_cell *cell) { return mfd_add_devices(stmpe->dev, stmpe->pdata->id, cell, 1, - NULL, stmpe->irq_base, NULL); + NULL, stmpe->irq_base, stmpe->domain); } static int __devinit stmpe_devices_init(struct stmpe *stmpe) @@ -1067,7 +1090,7 @@ int __devinit stmpe_probe(struct stmpe_client_info *ci, int partnum) if (ret) { dev_err(stmpe->dev, "failed to request IRQ: %d\n", ret); - goto out_removeirq; + goto free_gpio; } } @@ -1083,9 +1106,6 @@ out_removedevs: mfd_remove_devices(stmpe->dev); if (stmpe->irq >= 0) free_irq(stmpe->irq, stmpe); -out_removeirq: - if (stmpe->irq >= 0) - stmpe_irq_remove(stmpe); free_gpio: if (pdata->irq_over_gpio) gpio_free(pdata->irq_gpio); @@ -1098,10 +1118,8 @@ int stmpe_remove(struct stmpe *stmpe) { mfd_remove_devices(stmpe->dev); - if (stmpe->irq >= 0) { + if (stmpe->irq >= 0) free_irq(stmpe->irq, stmpe); - stmpe_irq_remove(stmpe); - } if (stmpe->pdata->irq_over_gpio) gpio_free(stmpe->pdata->irq_gpio); diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index f8d5b4d5843f..15dac790365f 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -62,6 +62,7 @@ struct stmpe_client_info; * @lock: lock protecting I/O operations * @irq_lock: IRQ bus lock * @dev: device, mostly for dev_dbg() + * @irq_domain: IRQ domain * @client: client - i2c or spi * @ci: client specific information * @partnum: part number @@ -79,6 +80,7 @@ struct stmpe { struct mutex lock; struct mutex irq_lock; struct device *dev; + struct irq_domain *domain; void *client; struct stmpe_client_info *ci; enum stmpe_partnum partnum; -- cgit v1.2.3 From 90a38d999739f35f4fc925c875e6ee518546b66c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 15 Oct 2012 22:44:45 +0200 Subject: mfd: Remove Unicode Byte Order Marks from da9055 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Older gcc (< 4.4) doesn't like files starting with Unicode BOMs: include/linux/mfd/da9055/core.h:1: error: stray ‘\357’ in program include/linux/mfd/da9055/core.h:1: error: stray ‘\273’ in program include/linux/mfd/da9055/core.h:1: error: stray ‘\277’ in program include/linux/mfd/da9055/pdata.h:1: error: stray ‘\357’ in program include/linux/mfd/da9055/pdata.h:1: error: stray ‘\273’ in program include/linux/mfd/da9055/pdata.h:1: error: stray ‘\277’ in program include/linux/mfd/da9055/reg.h:1: error: stray ‘\357’ in program include/linux/mfd/da9055/reg.h:1: error: stray ‘\273’ in program include/linux/mfd/da9055/reg.h:1: error: stray ‘\277’ in program Remove the BOMs, the rest of the files is plain ASCII anyway. Output of "file" before: include/linux/mfd/da9055/core.h: UTF-8 Unicode (with BOM) C program text include/linux/mfd/da9055/pdata.h: UTF-8 Unicode (with BOM) C program text include/linux/mfd/da9055/reg.h: UTF-8 Unicode (with BOM) C program text Output of "file" after: include/linux/mfd/da9055/core.h: ASCII C program text include/linux/mfd/da9055/pdata.h: ASCII C program text include/linux/mfd/da9055/reg.h: ASCII C program text Signed-off-by: Geert Uytterhoeven Signed-off-by: Samuel Ortiz --- include/linux/mfd/da9055/core.h | 2 +- include/linux/mfd/da9055/pdata.h | 2 +- include/linux/mfd/da9055/reg.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9055/core.h b/include/linux/mfd/da9055/core.h index c96ad682c59e..956afa445998 100644 --- a/include/linux/mfd/da9055/core.h +++ b/include/linux/mfd/da9055/core.h @@ -1,4 +1,4 @@ -/* +/* * da9055 declarations for DA9055 PMICs. * * Copyright(c) 2012 Dialog Semiconductor Ltd. diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h index 147293b4471d..b9b204edb4e9 100644 --- a/include/linux/mfd/da9055/pdata.h +++ b/include/linux/mfd/da9055/pdata.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2012 Dialog Semiconductor Ltd. +/* Copyright (C) 2012 Dialog Semiconductor Ltd. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/linux/mfd/da9055/reg.h b/include/linux/mfd/da9055/reg.h index df237ee54803..2b592e072dbf 100644 --- a/include/linux/mfd/da9055/reg.h +++ b/include/linux/mfd/da9055/reg.h @@ -1,4 +1,4 @@ -/* +/* * DA9055 declarations for DA9055 PMICs. * * Copyright(c) 2012 Dialog Semiconductor Ltd. -- cgit v1.2.3 From 89d16665d388837b30972081d97b814be26d68a2 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 9 Nov 2012 21:02:56 +0000 Subject: efivarfs: Use query_variable_info() to limit kmalloc() We don't want someone who can write EFI variables to be able to allocate arbitrarily large amounts of memory, so cap it to something sensible like the amount of free space for EFI variables. Acked-by: Jeremy Kerr Cc: Matthew Garrett Cc: Alan Cox Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 43 ++++++++++++++++++++++++++++++++++--------- include/linux/efi.h | 1 + 2 files changed, 35 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 9ac934018bba..d6b8d2f628fa 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -694,28 +694,51 @@ static ssize_t efivarfs_file_write(struct file *file, struct inode *inode = file->f_mapping->host; unsigned long datasize = count - sizeof(attributes); unsigned long newdatasize; + u64 storage_size, remaining_size, max_size; ssize_t bytes = 0; if (count < sizeof(attributes)) return -EINVAL; - data = kmalloc(datasize, GFP_KERNEL); + if (copy_from_user(&attributes, userbuf, sizeof(attributes))) + return -EFAULT; - if (!data) - return -ENOMEM; + if (attributes & ~(EFI_VARIABLE_MASK)) + return -EINVAL; efivars = var->efivars; - if (copy_from_user(&attributes, userbuf, sizeof(attributes))) { - bytes = -EFAULT; - goto out; + /* + * Ensure that the user can't allocate arbitrarily large + * amounts of memory. Pick a default size of 64K if + * QueryVariableInfo() isn't supported by the firmware. + */ + spin_lock(&efivars->lock); + + if (!efivars->ops->query_variable_info) + status = EFI_UNSUPPORTED; + else { + const struct efivar_operations *fops = efivars->ops; + status = fops->query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); } - if (attributes & ~(EFI_VARIABLE_MASK)) { - bytes = -EINVAL; - goto out; + spin_unlock(&efivars->lock); + + if (status != EFI_SUCCESS) { + if (status != EFI_UNSUPPORTED) + return efi_status_to_err(status); + + remaining_size = 65536; } + if (datasize > remaining_size) + return -ENOSPC; + + data = kmalloc(datasize, GFP_KERNEL); + if (!data) + return -ENOMEM; + if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) { bytes = -EFAULT; goto out; @@ -1709,6 +1732,8 @@ efivars_init(void) ops.get_variable = efi.get_variable; ops.set_variable = efi.set_variable; ops.get_next_variable = efi.get_next_variable; + ops.query_variable_info = efi.query_variable_info; + error = register_efivars(&__efivars, &ops, efi_kobj); if (error) goto err_put; diff --git a/include/linux/efi.h b/include/linux/efi.h index 5e2308d9c6be..f80079cd84f4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -646,6 +646,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; + efi_query_variable_info_t *query_variable_info; }; struct efivars { -- cgit v1.2.3 From ab7edb149c7548541ee588b8372c2041b6f1cbc8 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 11 Oct 2012 13:55:32 +0200 Subject: mfd: twl6040: Convert to use regmap_irq With regmap_irq it is possible to remove the twl6040-irq.c file and simplify the code. Signed-off-by: Peter Ujfalusi Reviewed-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 4 +- drivers/mfd/Makefile | 2 +- drivers/mfd/twl6040-core.c | 55 ++++++++---- drivers/mfd/twl6040-irq.c | 205 -------------------------------------------- include/linux/mfd/twl6040.h | 10 +-- 5 files changed, 48 insertions(+), 228 deletions(-) delete mode 100644 drivers/mfd/twl6040-irq.c (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 3f2187ae8c5d..34242cada125 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -321,10 +321,10 @@ config MFD_TWL4030_AUDIO config TWL6040_CORE bool "Support for TWL6040 audio codec" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C - select IRQ_DOMAIN + select REGMAP_IRQ default n help Say yes here if you want support for Texas Instruments TWL6040 audio diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index a4093a44304a..05bebf66ccd2 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_TWL4030_CORE) += twl-core.o twl4030-irq.o twl6030-irq.o obj-$(CONFIG_TWL4030_MADC) += twl4030-madc.o obj-$(CONFIG_TWL4030_POWER) += twl4030-power.o obj-$(CONFIG_MFD_TWL4030_AUDIO) += twl4030-audio.o -obj-$(CONFIG_TWL6040_CORE) += twl6040-core.o twl6040-irq.o +obj-$(CONFIG_TWL6040_CORE) += twl6040-core.o obj-$(CONFIG_MFD_MC13XXX) += mc13xxx-core.o obj-$(CONFIG_MFD_MC13XXX_SPI) += mc13xxx-spi.o diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c index 5817bc6d09dc..e5f7b795afff 100644 --- a/drivers/mfd/twl6040-core.c +++ b/drivers/mfd/twl6040-core.c @@ -499,6 +499,25 @@ static struct regmap_config twl6040_regmap_config = { .readable_reg = twl6040_readable_reg, }; +static const struct regmap_irq twl6040_irqs[] = { + { .reg_offset = 0, .mask = TWL6040_THINT, }, + { .reg_offset = 0, .mask = TWL6040_PLUGINT | TWL6040_UNPLUGINT, }, + { .reg_offset = 0, .mask = TWL6040_HOOKINT, }, + { .reg_offset = 0, .mask = TWL6040_HFINT, }, + { .reg_offset = 0, .mask = TWL6040_VIBINT, }, + { .reg_offset = 0, .mask = TWL6040_READYINT, }, +}; + +static struct regmap_irq_chip twl6040_irq_chip = { + .name = "twl6040", + .irqs = twl6040_irqs, + .num_irqs = ARRAY_SIZE(twl6040_irqs), + + .num_regs = 1, + .status_base = TWL6040_REG_INTID, + .mask_base = TWL6040_REG_INTMR, +}; + static int __devinit twl6040_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -574,21 +593,27 @@ static int __devinit twl6040_probe(struct i2c_client *client, goto gpio_err; } - /* codec interrupt */ - ret = twl6040_irq_init(twl6040); - if (ret) + ret = regmap_add_irq_chip(twl6040->regmap, twl6040->irq, + IRQF_ONESHOT, 0, &twl6040_irq_chip, + &twl6040->irq_data); + if (ret < 0) goto irq_init_err; - ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_READY, - NULL, twl6040_readyint_handler, IRQF_ONESHOT, + twl6040->irq_ready = regmap_irq_get_virq(twl6040->irq_data, + TWL6040_IRQ_READY); + twl6040->irq_th = regmap_irq_get_virq(twl6040->irq_data, + TWL6040_IRQ_TH); + + ret = request_threaded_irq(twl6040->irq_ready, NULL, + twl6040_readyint_handler, IRQF_ONESHOT, "twl6040_irq_ready", twl6040); if (ret) { dev_err(twl6040->dev, "READY IRQ request failed: %d\n", ret); goto readyirq_err; } - ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_TH, - NULL, twl6040_thint_handler, IRQF_ONESHOT, + ret = request_threaded_irq(twl6040->irq_th, NULL, + twl6040_thint_handler, IRQF_ONESHOT, "twl6040_irq_th", twl6040); if (ret) { dev_err(twl6040->dev, "Thermal IRQ request failed: %d\n", ret); @@ -604,7 +629,7 @@ static int __devinit twl6040_probe(struct i2c_client *client, * The ASoC codec can work without pdata, pass the platform_data only if * it has been provided. */ - irq = twl6040->irq_base + TWL6040_IRQ_PLUG; + irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_PLUG); cell = &twl6040->cells[children]; cell->name = "twl6040-codec"; twl6040_codec_rsrc[0].start = irq; @@ -618,7 +643,7 @@ static int __devinit twl6040_probe(struct i2c_client *client, children++; if (twl6040_has_vibra(pdata, node)) { - irq = twl6040->irq_base + TWL6040_IRQ_VIB; + irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_VIB); cell = &twl6040->cells[children]; cell->name = "twl6040-vibra"; @@ -657,11 +682,11 @@ static int __devinit twl6040_probe(struct i2c_client *client, return 0; mfd_err: - free_irq(twl6040->irq_base + TWL6040_IRQ_TH, twl6040); + free_irq(twl6040->irq_th, twl6040); thirq_err: - free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040); + free_irq(twl6040->irq_ready, twl6040); readyirq_err: - twl6040_irq_exit(twl6040); + regmap_del_irq_chip(twl6040->irq, twl6040->irq_data); irq_init_err: if (gpio_is_valid(twl6040->audpwron)) gpio_free(twl6040->audpwron); @@ -685,9 +710,9 @@ static int __devexit twl6040_remove(struct i2c_client *client) if (gpio_is_valid(twl6040->audpwron)) gpio_free(twl6040->audpwron); - free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040); - free_irq(twl6040->irq_base + TWL6040_IRQ_TH, twl6040); - twl6040_irq_exit(twl6040); + free_irq(twl6040->irq_ready, twl6040); + free_irq(twl6040->irq_th, twl6040); + regmap_del_irq_chip(twl6040->irq, twl6040->irq_data); mfd_remove_devices(&client->dev); i2c_set_clientdata(client, NULL); diff --git a/drivers/mfd/twl6040-irq.c b/drivers/mfd/twl6040-irq.c deleted file mode 100644 index 4b42543da228..000000000000 --- a/drivers/mfd/twl6040-irq.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Interrupt controller support for TWL6040 - * - * Author: Misael Lopez Cruz - * - * Copyright: (C) 2011 Texas Instruments, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct twl6040_irq_data { - int mask; - int status; -}; - -static struct twl6040_irq_data twl6040_irqs[] = { - { - .mask = TWL6040_THMSK, - .status = TWL6040_THINT, - }, - { - .mask = TWL6040_PLUGMSK, - .status = TWL6040_PLUGINT | TWL6040_UNPLUGINT, - }, - { - .mask = TWL6040_HOOKMSK, - .status = TWL6040_HOOKINT, - }, - { - .mask = TWL6040_HFMSK, - .status = TWL6040_HFINT, - }, - { - .mask = TWL6040_VIBMSK, - .status = TWL6040_VIBINT, - }, - { - .mask = TWL6040_READYMSK, - .status = TWL6040_READYINT, - }, -}; - -static inline -struct twl6040_irq_data *irq_to_twl6040_irq(struct twl6040 *twl6040, - int irq) -{ - return &twl6040_irqs[irq - twl6040->irq_base]; -} - -static void twl6040_irq_lock(struct irq_data *data) -{ - struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data); - - mutex_lock(&twl6040->irq_mutex); -} - -static void twl6040_irq_sync_unlock(struct irq_data *data) -{ - struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data); - - /* write back to hardware any change in irq mask */ - if (twl6040->irq_masks_cur != twl6040->irq_masks_cache) { - twl6040->irq_masks_cache = twl6040->irq_masks_cur; - twl6040_reg_write(twl6040, TWL6040_REG_INTMR, - twl6040->irq_masks_cur); - } - - mutex_unlock(&twl6040->irq_mutex); -} - -static void twl6040_irq_enable(struct irq_data *data) -{ - struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data); - struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040, - data->irq); - - twl6040->irq_masks_cur &= ~irq_data->mask; -} - -static void twl6040_irq_disable(struct irq_data *data) -{ - struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data); - struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040, - data->irq); - - twl6040->irq_masks_cur |= irq_data->mask; -} - -static struct irq_chip twl6040_irq_chip = { - .name = "twl6040", - .irq_bus_lock = twl6040_irq_lock, - .irq_bus_sync_unlock = twl6040_irq_sync_unlock, - .irq_enable = twl6040_irq_enable, - .irq_disable = twl6040_irq_disable, -}; - -static irqreturn_t twl6040_irq_thread(int irq, void *data) -{ - struct twl6040 *twl6040 = data; - u8 intid; - int i; - - intid = twl6040_reg_read(twl6040, TWL6040_REG_INTID); - - /* apply masking and report (backwards to handle READYINT first) */ - for (i = ARRAY_SIZE(twl6040_irqs) - 1; i >= 0; i--) { - if (twl6040->irq_masks_cur & twl6040_irqs[i].mask) - intid &= ~twl6040_irqs[i].status; - if (intid & twl6040_irqs[i].status) - handle_nested_irq(twl6040->irq_base + i); - } - - /* ack unmasked irqs */ - twl6040_reg_write(twl6040, TWL6040_REG_INTID, intid); - - return IRQ_HANDLED; -} - -int twl6040_irq_init(struct twl6040 *twl6040) -{ - struct device_node *node = twl6040->dev->of_node; - int i, nr_irqs, irq_base, ret; - u8 val; - - mutex_init(&twl6040->irq_mutex); - - /* mask the individual interrupt sources */ - twl6040->irq_masks_cur = TWL6040_ALLINT_MSK; - twl6040->irq_masks_cache = TWL6040_ALLINT_MSK; - twl6040_reg_write(twl6040, TWL6040_REG_INTMR, TWL6040_ALLINT_MSK); - - nr_irqs = ARRAY_SIZE(twl6040_irqs); - - irq_base = irq_alloc_descs(-1, 0, nr_irqs, 0); - if (IS_ERR_VALUE(irq_base)) { - dev_err(twl6040->dev, "Fail to allocate IRQ descs\n"); - return irq_base; - } - twl6040->irq_base = irq_base; - - irq_domain_add_legacy(node, ARRAY_SIZE(twl6040_irqs), irq_base, 0, - &irq_domain_simple_ops, NULL); - - /* Register them with genirq */ - for (i = irq_base; i < irq_base + nr_irqs; i++) { - irq_set_chip_data(i, twl6040); - irq_set_chip_and_handler(i, &twl6040_irq_chip, - handle_level_irq); - irq_set_nested_thread(i, 1); - - /* ARM needs us to explicitly flag the IRQ as valid - * and will set them noprobe when we do so. */ -#ifdef CONFIG_ARM - set_irq_flags(i, IRQF_VALID); -#else - irq_set_noprobe(i); -#endif - } - - ret = request_threaded_irq(twl6040->irq, NULL, twl6040_irq_thread, - IRQF_ONESHOT, "twl6040", twl6040); - if (ret) { - dev_err(twl6040->dev, "failed to request IRQ %d: %d\n", - twl6040->irq, ret); - return ret; - } - - /* reset interrupts */ - val = twl6040_reg_read(twl6040, TWL6040_REG_INTID); - - /* interrupts cleared on write */ - twl6040_clear_bits(twl6040, TWL6040_REG_ACCCTL, TWL6040_INTCLRMODE); - - return 0; -} -EXPORT_SYMBOL(twl6040_irq_init); - -void twl6040_irq_exit(struct twl6040 *twl6040) -{ - free_irq(twl6040->irq, twl6040); -} -EXPORT_SYMBOL(twl6040_irq_exit); diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h index a8eff4ad9be5..94ac944d12f0 100644 --- a/include/linux/mfd/twl6040.h +++ b/include/linux/mfd/twl6040.h @@ -207,10 +207,12 @@ struct twl6040_platform_data { }; struct regmap; +struct regmap_irq_chips_data; struct twl6040 { struct device *dev; struct regmap *regmap; + struct regmap_irq_chip_data *irq_data; struct regulator_bulk_data supplies[2]; /* supplies for vio, v2v1 */ struct mutex mutex; struct mutex irq_mutex; @@ -228,9 +230,8 @@ struct twl6040 { unsigned int mclk; unsigned int irq; - unsigned int irq_base; - u8 irq_masks_cur; - u8 irq_masks_cache; + unsigned int irq_ready; + unsigned int irq_th; }; int twl6040_reg_read(struct twl6040 *twl6040, unsigned int reg); @@ -245,8 +246,7 @@ int twl6040_set_pll(struct twl6040 *twl6040, int pll_id, unsigned int freq_in, unsigned int freq_out); int twl6040_get_pll(struct twl6040 *twl6040); unsigned int twl6040_get_sysclk(struct twl6040 *twl6040); -int twl6040_irq_init(struct twl6040 *twl6040); -void twl6040_irq_exit(struct twl6040 *twl6040); + /* Get the combined status of the vibra control register */ int twl6040_get_vibralr_status(struct twl6040 *twl6040); -- cgit v1.2.3 From 605511a848ae3ac4b2ce272ae6cbf8930b29ebb3 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 13 Nov 2012 19:18:05 +0530 Subject: mfd: Convert tps6586x to irq_domain Allocate the irq base if it base is not porvided i.e. in case of device tree invocation of this driver. Convert the tps6586x driver to irq domain, using a legacy IRQ mapping if an irq_base is specified in platform data or dynamically allocated and otherwise using a linear mapping. Signed-off-by: Laxman Dewangan Reviewed-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/tps6586x.c | 91 ++++++++++++++++++++++++++++++++------------ include/linux/mfd/tps6586x.h | 1 + 2 files changed, 67 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c index 467464368773..2cdf1e6c00c8 100644 --- a/drivers/mfd/tps6586x.c +++ b/drivers/mfd/tps6586x.c @@ -17,12 +17,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -116,6 +118,7 @@ struct tps6586x { int irq_base; u32 irq_en; u8 mask_reg[5]; + struct irq_domain *irq_domain; }; static inline struct tps6586x *dev_to_tps6586x(struct device *dev) @@ -184,6 +187,14 @@ int tps6586x_update(struct device *dev, int reg, uint8_t val, uint8_t mask) } EXPORT_SYMBOL_GPL(tps6586x_update); +int tps6586x_irq_get_virq(struct device *dev, int irq) +{ + struct tps6586x *tps6586x = dev_to_tps6586x(dev); + + return irq_create_mapping(tps6586x->irq_domain, irq); +} +EXPORT_SYMBOL_GPL(tps6586x_irq_get_virq); + static int __remove_subdev(struct device *dev, void *unused) { platform_device_unregister(to_platform_device(dev)); @@ -205,7 +216,7 @@ static void tps6586x_irq_lock(struct irq_data *data) static void tps6586x_irq_enable(struct irq_data *irq_data) { struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data); - unsigned int __irq = irq_data->irq - tps6586x->irq_base; + unsigned int __irq = irq_data->hwirq; const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq]; tps6586x->mask_reg[data->mask_reg] &= ~data->mask_mask; @@ -216,7 +227,7 @@ static void tps6586x_irq_disable(struct irq_data *irq_data) { struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data); - unsigned int __irq = irq_data->irq - tps6586x->irq_base; + unsigned int __irq = irq_data->hwirq; const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq]; tps6586x->mask_reg[data->mask_reg] |= data->mask_mask; @@ -239,6 +250,39 @@ static void tps6586x_irq_sync_unlock(struct irq_data *data) mutex_unlock(&tps6586x->irq_lock); } +static struct irq_chip tps6586x_irq_chip = { + .name = "tps6586x", + .irq_bus_lock = tps6586x_irq_lock, + .irq_bus_sync_unlock = tps6586x_irq_sync_unlock, + .irq_disable = tps6586x_irq_disable, + .irq_enable = tps6586x_irq_enable, +}; + +static int tps6586x_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct tps6586x *tps6586x = h->host_data; + + irq_set_chip_data(virq, tps6586x); + irq_set_chip_and_handler(virq, &tps6586x_irq_chip, handle_simple_irq); + irq_set_nested_thread(virq, 1); + + /* ARM needs us to explicitly flag the IRQ as valid + * and will set them noprobe when we do so. */ +#ifdef CONFIG_ARM + set_irq_flags(virq, IRQF_VALID); +#else + irq_set_noprobe(virq); +#endif + + return 0; +} + +static struct irq_domain_ops tps6586x_domain_ops = { + .map = tps6586x_irq_map, + .xlate = irq_domain_xlate_twocell, +}; + static irqreturn_t tps6586x_irq(int irq, void *data) { struct tps6586x *tps6586x = data; @@ -259,7 +303,8 @@ static irqreturn_t tps6586x_irq(int irq, void *data) int i = __ffs(acks); if (tps6586x->irq_en & (1 << i)) - handle_nested_irq(tps6586x->irq_base + i); + handle_nested_irq( + irq_find_mapping(tps6586x->irq_domain, i)); acks &= ~(1 << i); } @@ -272,11 +317,8 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq, { int i, ret; u8 tmp[4]; - - if (!irq_base) { - dev_warn(tps6586x->dev, "No interrupt support on IRQ base\n"); - return -EINVAL; - } + int new_irq_base; + int irq_num = ARRAY_SIZE(tps6586x_irqs); mutex_init(&tps6586x->irq_lock); for (i = 0; i < 5; i++) { @@ -286,25 +328,24 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq, tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, sizeof(tmp), tmp); - tps6586x->irq_base = irq_base; - - tps6586x->irq_chip.name = "tps6586x"; - tps6586x->irq_chip.irq_enable = tps6586x_irq_enable; - tps6586x->irq_chip.irq_disable = tps6586x_irq_disable; - tps6586x->irq_chip.irq_bus_lock = tps6586x_irq_lock; - tps6586x->irq_chip.irq_bus_sync_unlock = tps6586x_irq_sync_unlock; - - for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) { - int __irq = i + tps6586x->irq_base; - irq_set_chip_data(__irq, tps6586x); - irq_set_chip_and_handler(__irq, &tps6586x->irq_chip, - handle_simple_irq); - irq_set_nested_thread(__irq, 1); -#ifdef CONFIG_ARM - set_irq_flags(__irq, IRQF_VALID); -#endif + if (irq_base > 0) { + new_irq_base = irq_alloc_descs(irq_base, 0, irq_num, -1); + if (new_irq_base < 0) { + dev_err(tps6586x->dev, + "Failed to alloc IRQs: %d\n", new_irq_base); + return new_irq_base; + } + } else { + new_irq_base = 0; } + tps6586x->irq_domain = irq_domain_add_simple(tps6586x->dev->of_node, + irq_num, new_irq_base, &tps6586x_domain_ops, + tps6586x); + if (!tps6586x->irq_domain) { + dev_err(tps6586x->dev, "Failed to create IRQ domain\n"); + return -ENOMEM; + } ret = request_threaded_irq(irq, NULL, tps6586x_irq, IRQF_ONESHOT, "tps6586x", tps6586x); diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h index 2dd123194958..ebd8b08a386b 100644 --- a/include/linux/mfd/tps6586x.h +++ b/include/linux/mfd/tps6586x.h @@ -93,5 +93,6 @@ extern int tps6586x_set_bits(struct device *dev, int reg, uint8_t bit_mask); extern int tps6586x_clr_bits(struct device *dev, int reg, uint8_t bit_mask); extern int tps6586x_update(struct device *dev, int reg, uint8_t val, uint8_t mask); +extern int tps6586x_irq_get_virq(struct device *dev, int irq); #endif /*__LINUX_MFD_TPS6586X_H */ -- cgit v1.2.3 From 2cd451792db174382aaca96c75bc3bf47a6065fe Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 14 Nov 2012 11:14:26 +0800 Subject: spi/omap: fix D0/D1 direction confusion 0384e90b8 ("spi/mcspi: allow configuration of pin directions") did what it claimed to do the wrong way around. D0/D1 is configured as output by *clearing* the bits in the conf registers, hence also breaking the former default behaviour. Fix this before that change is merged to mainline. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/omap-spi.txt | 6 +++--- drivers/spi/spi-omap2-mcspi.c | 6 +++--- include/linux/platform_data/spi-omap2-mcspi.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt index 2ef0a6b85653..938809c6829b 100644 --- a/Documentation/devicetree/bindings/spi/omap-spi.txt +++ b/Documentation/devicetree/bindings/spi/omap-spi.txt @@ -6,9 +6,9 @@ Required properties: - "ti,omap4-spi" for OMAP4+. - ti,spi-num-cs : Number of chipselect supported by the instance. - ti,hwmods: Name of the hwmod associated to the McSPI -- ti,pindir-d0-in-d1-out: Select the D0 pin as input and D1 as - output. The default is D0 as output and - D1 as input. +- ti,pindir-d0-out-d1-in: Select the D0 pin as output and D1 as + input. The default is D0 as input and + D1 as output. Example: diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 51046332677c..89f73c425c3d 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -766,7 +766,7 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi, /* standard 4-wire master mode: SCK, MOSI/out, MISO/in, nCS * REVISIT: this controller could support SPI_3WIRE mode. */ - if (mcspi->pin_dir == MCSPI_PINDIR_D0_OUT_D1_IN) { + if (mcspi->pin_dir == MCSPI_PINDIR_D0_IN_D1_OUT) { l &= ~OMAP2_MCSPI_CHCONF_IS; l &= ~OMAP2_MCSPI_CHCONF_DPE1; l |= OMAP2_MCSPI_CHCONF_DPE0; @@ -1188,8 +1188,8 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev) of_property_read_u32(node, "ti,spi-num-cs", &num_cs); master->num_chipselect = num_cs; master->bus_num = bus_num++; - if (of_get_property(node, "ti,pindir-d0-in-d1-out", NULL)) - mcspi->pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT; + if (of_get_property(node, "ti,pindir-d0-out-d1-in", NULL)) + mcspi->pin_dir = MCSPI_PINDIR_D0_OUT_D1_IN; } else { pdata = pdev->dev.platform_data; master->num_chipselect = pdata->num_cs; diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h index ce70f7b5a8e1..a65572d53211 100644 --- a/include/linux/platform_data/spi-omap2-mcspi.h +++ b/include/linux/platform_data/spi-omap2-mcspi.h @@ -7,8 +7,8 @@ #define OMAP4_MCSPI_REG_OFFSET 0x100 -#define MCSPI_PINDIR_D0_OUT_D1_IN 0 -#define MCSPI_PINDIR_D0_IN_D1_OUT 1 +#define MCSPI_PINDIR_D0_IN_D1_OUT 0 +#define MCSPI_PINDIR_D0_OUT_D1_IN 1 struct omap2_mcspi_platform_config { unsigned short num_cs; -- cgit v1.2.3 From 49839dc93970789cea46f5171cd7f6ec11af64c7 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 6 Nov 2012 16:31:32 +0000 Subject: Revert "ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints" This reverts commit 3db11feffc1ad2ab9dea27789e6b5b3032827adc (ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints). This commit causes I2C timeouts to appear on several OMAP3430/3530-based boards: http://marc.info/?l=linux-arm-kernel&m=135071372426971&w=2 http://marc.info/?l=linux-arm-kernel&m=135067558415214&w=2 http://marc.info/?l=linux-arm-kernel&m=135216013608196&w=2 and appears to have been sent for merging before one of its prerequisites was merged: http://marc.info/?l=linux-arm-kernel&m=135219411617621&w=2 Signed-off-by: Paul Walmsley Acked-by: Jean Pihet Signed-off-by: Wolfram Sang --- arch/arm/plat-omap/i2c.c | 21 +++++++++++++++++++++ drivers/i2c/busses/i2c-omap.c | 32 ++++++++++++++------------------ include/linux/i2c-omap.h | 1 + 3 files changed, 36 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c index a5683a84c6ee..6013831a043e 100644 --- a/arch/arm/plat-omap/i2c.c +++ b/arch/arm/plat-omap/i2c.c @@ -26,12 +26,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #define OMAP_I2C_SIZE 0x3f @@ -127,6 +129,16 @@ static inline int omap1_i2c_add_bus(int bus_id) #ifdef CONFIG_ARCH_OMAP2PLUS +/* + * XXX This function is a temporary compatibility wrapper - only + * needed until the I2C driver can be converted to call + * omap_pm_set_max_dev_wakeup_lat() and handle a return code. + */ +static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t) +{ + omap_pm_set_max_mpu_wakeup_lat(dev, t); +} + static inline int omap2_i2c_add_bus(int bus_id) { int l; @@ -158,6 +170,15 @@ static inline int omap2_i2c_add_bus(int bus_id) dev_attr = (struct omap_i2c_dev_attr *)oh->dev_attr; pdata->flags = dev_attr->flags; + /* + * When waiting for completion of a i2c transfer, we need to + * set a wake up latency constraint for the MPU. This is to + * ensure quick enough wakeup from idle, when transfer + * completes. + * Only omap3 has support for constraints + */ + if (cpu_is_omap34xx()) + pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat; pdev = omap_device_build(name, bus_id, oh, pdata, sizeof(struct omap_i2c_bus_platform_data), NULL, 0, 0); diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index db31eaed6ea5..0b0254312d21 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -43,7 +43,6 @@ #include #include #include -#include /* I2C controller revisions */ #define OMAP_I2C_OMAP1_REV_2 0x20 @@ -187,8 +186,9 @@ struct omap_i2c_dev { int reg_shift; /* bit shift for I2C register addresses */ struct completion cmd_complete; struct resource *ioarea; - u32 latency; /* maximum MPU wkup latency */ - struct pm_qos_request pm_qos_request; + u32 latency; /* maximum mpu wkup latency */ + void (*set_mpu_wkup_lat)(struct device *dev, + long latency); u32 speed; /* Speed of bus in kHz */ u32 dtrev; /* extra revision from DT */ u32 flags; @@ -494,7 +494,9 @@ static void omap_i2c_resize_fifo(struct omap_i2c_dev *dev, u8 size, bool is_rx) dev->b_hw = 1; /* Enable hardware fixes */ /* calculate wakeup latency constraint for MPU */ - dev->latency = (1000000 * dev->threshold) / (1000 * dev->speed / 8); + if (dev->set_mpu_wkup_lat != NULL) + dev->latency = (1000000 * dev->threshold) / + (1000 * dev->speed / 8); } /* @@ -629,16 +631,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) if (r < 0) goto out; - /* - * When waiting for completion of a i2c transfer, we need to - * set a wake up latency constraint for the MPU. This is to - * ensure quick enough wakeup from idle, when transfer - * completes. - */ - if (dev->latency) - pm_qos_add_request(&dev->pm_qos_request, - PM_QOS_CPU_DMA_LATENCY, - dev->latency); + if (dev->set_mpu_wkup_lat != NULL) + dev->set_mpu_wkup_lat(dev->dev, dev->latency); for (i = 0; i < num; i++) { r = omap_i2c_xfer_msg(adap, &msgs[i], (i == (num - 1))); @@ -646,8 +640,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) break; } - if (dev->latency) - pm_qos_remove_request(&dev->pm_qos_request); + if (dev->set_mpu_wkup_lat != NULL) + dev->set_mpu_wkup_lat(dev->dev, -1); if (r == 0) r = num; @@ -1104,6 +1098,7 @@ omap_i2c_probe(struct platform_device *pdev) } else if (pdata != NULL) { dev->speed = pdata->clkrate; dev->flags = pdata->flags; + dev->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat; dev->dtrev = pdata->rev; } @@ -1159,8 +1154,9 @@ omap_i2c_probe(struct platform_device *pdev) dev->b_hw = 1; /* Enable hardware fixes */ /* calculate wakeup latency constraint for MPU */ - dev->latency = (1000000 * dev->fifo_size) / - (1000 * dev->speed / 8); + if (dev->set_mpu_wkup_lat != NULL) + dev->latency = (1000000 * dev->fifo_size) / + (1000 * dev->speed / 8); } /* reset ASAP, clearing any IRQs */ diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index df804ba73e0b..92a0dc75bc74 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -34,6 +34,7 @@ struct omap_i2c_bus_platform_data { u32 clkrate; u32 rev; u32 flags; + void (*set_mpu_wkup_lat)(struct device *dev, long set); }; #endif -- cgit v1.2.3 From 2c88ab8c5af7d637d2a9d14b607fa6100fa64236 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti D Date: Mon, 5 Nov 2012 17:53:39 +0530 Subject: ARM: i2c: omap: Remove the i207 errata flag The commit [i2c: omap: use revision check for OMAP_I2C_FLAG_APPLY_ERRATA_I207] uses the revision id instead of the flag. So the flag can be safely removed. Reviewed-by: Felipe Balbi Signed-off-by: Shubhrajyoti D Signed-off-by: Wolfram Sang --- arch/arm/mach-omap2/omap_hwmod_2430_data.c | 3 +-- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++------ drivers/i2c/busses/i2c-omap.c | 3 +-- include/linux/i2c-omap.h | 1 - 4 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c index c455e41b0237..b79ccf6efbe8 100644 --- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c @@ -76,8 +76,7 @@ static struct omap_hwmod_class i2c_class = { static struct omap_i2c_dev_attr i2c_dev_attr = { .fifo_depth = 8, /* bytes */ - .flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 | - OMAP_I2C_FLAG_BUS_SHIFT_2 | + .flags = OMAP_I2C_FLAG_BUS_SHIFT_2 | OMAP_I2C_FLAG_FORCE_19200_INT_CLK, }; diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index f67b7ee07dd4..943222c40489 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -791,8 +791,7 @@ static struct omap_hwmod omap3xxx_dss_venc_hwmod = { /* I2C1 */ static struct omap_i2c_dev_attr i2c1_dev_attr = { .fifo_depth = 8, /* bytes */ - .flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 | - OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | + .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | OMAP_I2C_FLAG_BUS_SHIFT_2, }; @@ -818,8 +817,7 @@ static struct omap_hwmod omap3xxx_i2c1_hwmod = { /* I2C2 */ static struct omap_i2c_dev_attr i2c2_dev_attr = { .fifo_depth = 8, /* bytes */ - .flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 | - OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | + .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | OMAP_I2C_FLAG_BUS_SHIFT_2, }; @@ -845,8 +843,7 @@ static struct omap_hwmod omap3xxx_i2c2_hwmod = { /* I2C3 */ static struct omap_i2c_dev_attr i2c3_dev_attr = { .fifo_depth = 64, /* bytes */ - .flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 | - OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | + .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | OMAP_I2C_FLAG_BUS_SHIFT_2, }; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 0ca50e71731b..11e645ab1e79 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1028,8 +1028,7 @@ static const struct i2c_algorithm omap_i2c_algo = { #ifdef CONFIG_OF static struct omap_i2c_bus_platform_data omap3_pdata = { .rev = OMAP_I2C_IP_VERSION_1, - .flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 | - OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | + .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | OMAP_I2C_FLAG_BUS_SHIFT_2, }; diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index 92a0dc75bc74..1b25c04f82d9 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -21,7 +21,6 @@ #define OMAP_I2C_FLAG_SIMPLE_CLOCK BIT(1) #define OMAP_I2C_FLAG_16BIT_DATA_REG BIT(2) #define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE BIT(3) -#define OMAP_I2C_FLAG_APPLY_ERRATA_I207 BIT(4) #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK BIT(5) #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK BIT(6) /* how the CPU address bus must be translated for I2C unit access */ -- cgit v1.2.3 From 1cf3d8b3d24cd383ddfd5442c83ec5c355ffc2f7 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Tue, 2 Oct 2012 16:57:57 +0000 Subject: powerpc+of: Add of node/property notification chain for adds and removes This patch moves the notification chain for updates to the device tree from the powerpc/pseries code to the base OF code. This makes this functionality available to all architectures. Additionally the notification chain is updated to allow notifications for property add/remove/update. To make this work a pointer to a new struct (of_prop_reconfig) is passed to the routines in the notification chain. The of_prop_reconfig property contains a pointer to the node containing the property and a pointer to the property itself. In the case of property updates, the property pointer refers to the new property. Signed-off-by: Nathan Fontenot Acked-by: Rob Herring Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pSeries_reconfig.h | 32 ----------- arch/powerpc/kernel/prom.c | 6 +- arch/powerpc/platforms/pseries/dlpar.c | 14 +++-- arch/powerpc/platforms/pseries/hotplug-cpu.c | 8 +-- arch/powerpc/platforms/pseries/hotplug-memory.c | 60 ++++++++++++++------ arch/powerpc/platforms/pseries/iommu.c | 6 +- arch/powerpc/platforms/pseries/reconfig.c | 68 ++--------------------- arch/powerpc/platforms/pseries/setup.c | 6 +- drivers/crypto/nx/nx-842.c | 20 +++---- drivers/crypto/nx/nx.c | 1 - drivers/of/base.c | 74 +++++++++++++++++++++++-- include/linux/of.h | 22 ++++++-- 12 files changed, 163 insertions(+), 154 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h index c07edfe98b98..adc00d2e75b0 100644 --- a/arch/powerpc/include/asm/pSeries_reconfig.h +++ b/arch/powerpc/include/asm/pSeries_reconfig.h @@ -2,43 +2,11 @@ #define _PPC64_PSERIES_RECONFIG_H #ifdef __KERNEL__ -#include - -/* - * Use this API if your code needs to know about OF device nodes being - * added or removed on pSeries systems. - */ - -#define PSERIES_RECONFIG_ADD 0x0001 -#define PSERIES_RECONFIG_REMOVE 0x0002 -#define PSERIES_DRCONF_MEM_ADD 0x0003 -#define PSERIES_DRCONF_MEM_REMOVE 0x0004 -#define PSERIES_UPDATE_PROPERTY 0x0005 - -/** - * pSeries_reconfig_notify - Notifier value structure for OFDT property updates - * - * @node: Device tree node which owns the property being updated - * @property: Updated property - */ -struct pSeries_reconfig_prop_update { - struct device_node *node; - struct property *property; -}; - #ifdef CONFIG_PPC_PSERIES -extern int pSeries_reconfig_notifier_register(struct notifier_block *); -extern void pSeries_reconfig_notifier_unregister(struct notifier_block *); -extern int pSeries_reconfig_notify(unsigned long action, void *p); /* Not the best place to put this, will be fixed when we move some * of the rtas suspend-me stuff to pseries */ extern void pSeries_coalesce_init(void); #else /* !CONFIG_PPC_PSERIES */ -static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb) -{ - return 0; -} -static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { } static inline void pSeries_coalesce_init(void) { } #endif /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 37725e86651e..6feb60c3c6e3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -802,7 +802,7 @@ static int prom_reconfig_notifier(struct notifier_block *nb, int err; switch (action) { - case PSERIES_RECONFIG_ADD: + case OF_RECONFIG_ATTACH_NODE: err = of_finish_dynamic_node(node); if (err < 0) printk(KERN_ERR "finish_node returned %d\n", err); @@ -821,7 +821,7 @@ static struct notifier_block prom_reconfig_nb = { static int __init prom_reconfig_setup(void) { - return pSeries_reconfig_notifier_register(&prom_reconfig_nb); + return of_reconfig_notifier_register(&prom_reconfig_nb); } __initcall(prom_reconfig_setup); #endif diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index e36789bd4e6c..a1a7b9a67ffd 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -16,13 +16,13 @@ #include #include #include +#include #include "offline_states.h" #include #include #include #include -#include struct cc_workarea { u32 drc_index; @@ -262,24 +262,26 @@ int dlpar_attach_node(struct device_node *dn) if (!dn->parent) return -ENOMEM; - rc = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, dn); + rc = of_attach_node(dn); if (rc) { printk(KERN_ERR "Failed to add device node %s\n", dn->full_name); return rc; } - of_attach_node(dn); of_node_put(dn->parent); return 0; } int dlpar_detach_node(struct device_node *dn) { - pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, dn); - of_detach_node(dn); - of_node_put(dn); /* Must decrement the refcount */ + int rc; + + rc = of_detach_node(dn); + if (rc) + return rc; + of_node_put(dn); /* Must decrement the refcount */ return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 64c97d8ac0c5..a38956269fbf 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -23,12 +23,12 @@ #include #include /* for idle_task_exit */ #include +#include #include #include #include #include #include -#include #include #include "plpar_wrappers.h" #include "offline_states.h" @@ -333,10 +333,10 @@ static int pseries_smp_notifier(struct notifier_block *nb, int err = 0; switch (action) { - case PSERIES_RECONFIG_ADD: + case OF_RECONFIG_ATTACH_NODE: err = pseries_add_processor(node); break; - case PSERIES_RECONFIG_REMOVE: + case OF_RECONFIG_DETACH_NODE: pseries_remove_processor(node); break; } @@ -399,7 +399,7 @@ static int __init pseries_cpu_hotplug_init(void) /* Processors can be added/removed only on LPAR */ if (firmware_has_feature(FW_FEATURE_LPAR)) { - pSeries_reconfig_notifier_register(&pseries_smp_nb); + of_reconfig_notifier_register(&pseries_smp_nb); cpu_maps_update_begin(); if (cede_offline_enabled && parse_cede_parameters() == 0) { default_offline_state = CPU_STATE_INACTIVE; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index ecdb0a6b3171..2372c609fa2b 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -16,7 +16,6 @@ #include #include -#include #include static unsigned long get_memblock_size(void) @@ -187,42 +186,69 @@ static int pseries_add_memory(struct device_node *np) return (ret < 0) ? -EINVAL : 0; } -static int pseries_drconf_memory(unsigned long *base, unsigned int action) +static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) { + struct of_drconf_cell *new_drmem, *old_drmem; unsigned long memblock_size; - int rc; + u32 entries; + u32 *p; + int i, rc = -EINVAL; memblock_size = get_memblock_size(); if (!memblock_size) return -EINVAL; - if (action == PSERIES_DRCONF_MEM_ADD) { - rc = memblock_add(*base, memblock_size); - rc = (rc < 0) ? -EINVAL : 0; - } else if (action == PSERIES_DRCONF_MEM_REMOVE) { - rc = pseries_remove_memblock(*base, memblock_size); - } else { - rc = -EINVAL; + p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL); + if (!p) + return -EINVAL; + + /* The first int of the property is the number of lmb's described + * by the property. This is followed by an array of of_drconf_cell + * entries. Get the niumber of entries and skip to the array of + * of_drconf_cell's. + */ + entries = *p++; + old_drmem = (struct of_drconf_cell *)p; + + p = (u32 *)pr->prop->value; + p++; + new_drmem = (struct of_drconf_cell *)p; + + for (i = 0; i < entries; i++) { + if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) && + (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) { + rc = pseries_remove_memblock(old_drmem[i].base_addr, + memblock_size); + break; + } else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) && + (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) { + rc = memblock_add(old_drmem[i].base_addr, + memblock_size); + rc = (rc < 0) ? -EINVAL : 0; + break; + } } return rc; } static int pseries_memory_notifier(struct notifier_block *nb, - unsigned long action, void *node) + unsigned long action, void *node) { + struct of_prop_reconfig *pr; int err = 0; switch (action) { - case PSERIES_RECONFIG_ADD: + case OF_RECONFIG_ATTACH_NODE: err = pseries_add_memory(node); break; - case PSERIES_RECONFIG_REMOVE: + case OF_RECONFIG_DETACH_NODE: err = pseries_remove_memory(node); break; - case PSERIES_DRCONF_MEM_ADD: - case PSERIES_DRCONF_MEM_REMOVE: - err = pseries_drconf_memory(node, action); + case OF_RECONFIG_UPDATE_PROPERTY: + pr = (struct of_prop_reconfig *)node; + if (!strcmp(pr->prop->name, "ibm,dynamic-memory")) + err = pseries_update_drconf_memory(pr); break; } return notifier_from_errno(err); @@ -235,7 +261,7 @@ static struct notifier_block pseries_mem_nb = { static int __init pseries_memory_hotplug_init(void) { if (firmware_has_feature(FW_FEATURE_LPAR)) - pSeries_reconfig_notifier_register(&pseries_mem_nb); + of_reconfig_notifier_register(&pseries_mem_nb); return 0; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6153eea27ce7..da5594c441e4 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -36,13 +36,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -1294,7 +1294,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti struct direct_window *window; switch (action) { - case PSERIES_RECONFIG_REMOVE: + case OF_RECONFIG_DETACH_NODE: if (pci && pci->iommu_table) iommu_free_table(pci->iommu_table, np->full_name); @@ -1357,7 +1357,7 @@ void iommu_init_early_pSeries(void) } - pSeries_reconfig_notifier_register(&iommu_reconfig_nb); + of_reconfig_notifier_register(&iommu_reconfig_nb); register_memory_notifier(&iommu_mem_nb); set_pci_dma_ops(&dma_iommu_ops); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index f99f1ca8035b..720a0cc2e69f 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -16,11 +16,11 @@ #include #include #include +#include #include #include #include -#include #include /** @@ -55,28 +55,6 @@ static struct device_node *derive_parent(const char *path) return parent; } -static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain); - -int pSeries_reconfig_notifier_register(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb); -} -EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_register); - -void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) -{ - blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb); -} -EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_unregister); - -int pSeries_reconfig_notify(unsigned long action, void *p) -{ - int err = blocking_notifier_call_chain(&pSeries_reconfig_chain, - action, p); - - return notifier_to_errno(err); -} - static int pSeries_reconfig_add_node(const char *path, struct property *proplist) { struct device_node *np; @@ -100,13 +78,12 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist goto out_err; } - err = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, np); + err = of_attach_node(np); if (err) { printk(KERN_ERR "Failed to add device node %s\n", path); goto out_err; } - of_attach_node(np); of_node_put(np->parent); return 0; @@ -134,9 +111,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np) return -EBUSY; } - pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, np); of_detach_node(np); - of_node_put(parent); of_node_put(np); /* Must decrement the refcount */ return 0; @@ -381,10 +356,9 @@ static int do_remove_property(char *buf, size_t bufsize) static int do_update_property(char *buf, size_t bufsize) { struct device_node *np; - struct pSeries_reconfig_prop_update upd_value; unsigned char *value; char *name, *end, *next_prop; - int rc, length; + int length; struct property *newprop; buf = parse_node(buf, bufsize, &np); end = buf + bufsize; @@ -406,41 +380,7 @@ static int do_update_property(char *buf, size_t bufsize) if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size")) slb_set_size(*(int *)value); - upd_value.node = np; - upd_value.property = newprop; - pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value); - - rc = prom_update_property(np, newprop); - if (rc) - return rc; - - /* For memory under the ibm,dynamic-reconfiguration-memory node - * of the device tree, adding and removing memory is just an update - * to the ibm,dynamic-memory property instead of adding/removing a - * memory node in the device tree. For these cases we still need to - * involve the notifier chain. - */ - if (!strcmp(name, "ibm,dynamic-memory")) { - int action; - - next_prop = parse_next_property(next_prop, end, &name, - &length, &value); - if (!next_prop) - return -EINVAL; - - if (!strcmp(name, "add")) - action = PSERIES_DRCONF_MEM_ADD; - else - action = PSERIES_DRCONF_MEM_REMOVE; - - rc = pSeries_reconfig_notify(action, value); - if (rc) { - prom_update_property(np, newprop); - return rc; - } - } - - return 0; + return prom_update_property(np, newprop); } /** diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e3cb7ae61658..e1a5b8a32d25 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,6 @@ #include #include #include -#include #include "plpar_wrappers.h" #include "pseries.h" @@ -258,7 +258,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act int err = NOTIFY_OK; switch (action) { - case PSERIES_RECONFIG_ADD: + case OF_RECONFIG_ATTACH_NODE: pci = np->parent->data; if (pci) { update_dn_pci_info(np, pci->phb); @@ -389,7 +389,7 @@ static void __init pSeries_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); find_and_init_phbs(); - pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); + of_reconfig_notifier_register(&pci_dn_reconfig_nb); pSeries_nvram_init(); diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 0ce625738677..6c4c000671c5 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -28,7 +28,6 @@ #include #include -#include #include #include "nx_csbcpb.h" /* struct nx_csbcpb */ @@ -1014,26 +1013,23 @@ error_out: * NOTIFY_BAD encoded with error number on failure, use * notifier_to_errno() to decode this value */ -static int nx842_OF_notifier(struct notifier_block *np, - unsigned long action, - void *update) +static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, + void *update) { - struct pSeries_reconfig_prop_update *upd; + struct of_prop_reconfig *upd = update; struct nx842_devdata *local_devdata; struct device_node *node = NULL; - upd = (struct pSeries_reconfig_prop_update *)update; - rcu_read_lock(); local_devdata = rcu_dereference(devdata); if (local_devdata) node = local_devdata->dev->of_node; if (local_devdata && - action == PSERIES_UPDATE_PROPERTY && - !strcmp(upd->node->name, node->name)) { + action == OF_RECONFIG_UPDATE_PROPERTY && + !strcmp(upd->dn->name, node->name)) { rcu_read_unlock(); - nx842_OF_upd(upd->property); + nx842_OF_upd(upd->prop); } else rcu_read_unlock(); @@ -1182,7 +1178,7 @@ static int __init nx842_probe(struct vio_dev *viodev, synchronize_rcu(); kfree(old_devdata); - pSeries_reconfig_notifier_register(&nx842_of_nb); + of_reconfig_notifier_register(&nx842_of_nb); ret = nx842_OF_upd(NULL); if (ret && ret != -ENODEV) { @@ -1228,7 +1224,7 @@ static int __exit nx842_remove(struct vio_dev *viodev) spin_lock_irqsave(&devdata_mutex, flags); old_devdata = rcu_dereference_check(devdata, lockdep_is_held(&devdata_mutex)); - pSeries_reconfig_notifier_unregister(&nx842_of_nb); + of_reconfig_notifier_unregister(&nx842_of_nb); rcu_assign_pointer(devdata, NULL); spin_unlock_irqrestore(&devdata_mutex, flags); synchronize_rcu(); diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 638110efae9b..f7a8a16aa7d3 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/drivers/of/base.c b/drivers/of/base.c index bbd073f53c9f..87b63850e8dc 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1028,6 +1028,24 @@ int of_parse_phandle_with_args(struct device_node *np, const char *list_name, } EXPORT_SYMBOL(of_parse_phandle_with_args); +#if defined(CONFIG_OF_DYNAMIC) +static int of_property_notify(int action, struct device_node *np, + struct property *prop) +{ + struct of_prop_reconfig pr; + + pr.dn = np; + pr.prop = prop; + return of_reconfig_notify(action, &pr); +} +#else +static int of_property_notify(int action, struct device_node *np, + struct property *prop) +{ + return 0; +} +#endif + /** * prom_add_property - Add a property to a node */ @@ -1035,6 +1053,11 @@ int prom_add_property(struct device_node *np, struct property *prop) { struct property **next; unsigned long flags; + int rc; + + rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop); + if (rc) + return rc; prop->next = NULL; write_lock_irqsave(&devtree_lock, flags); @@ -1072,6 +1095,11 @@ int prom_remove_property(struct device_node *np, struct property *prop) struct property **next; unsigned long flags; int found = 0; + int rc; + + rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop); + if (rc) + return rc; write_lock_irqsave(&devtree_lock, flags); next = &np->properties; @@ -1114,7 +1142,11 @@ int prom_update_property(struct device_node *np, { struct property **next, *oldprop; unsigned long flags; - int found = 0; + int rc, found = 0; + + rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop); + if (rc) + return rc; if (!newprop->name) return -EINVAL; @@ -1160,6 +1192,26 @@ int prom_update_property(struct device_node *np, * device tree nodes. */ +static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain); + +int of_reconfig_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&of_reconfig_chain, nb); +} + +int of_reconfig_notifier_unregister(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&of_reconfig_chain, nb); +} + +int of_reconfig_notify(unsigned long action, void *p) +{ + int rc; + + rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p); + return notifier_to_errno(rc); +} + #ifdef CONFIG_PROC_DEVICETREE static void of_add_proc_dt_entry(struct device_node *dn) { @@ -1179,9 +1231,14 @@ static void of_add_proc_dt_entry(struct device_node *dn) /** * of_attach_node - Plug a device node into the tree and global list. */ -void of_attach_node(struct device_node *np) +int of_attach_node(struct device_node *np) { unsigned long flags; + int rc; + + rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); + if (rc) + return rc; write_lock_irqsave(&devtree_lock, flags); np->sibling = np->parent->child; @@ -1191,6 +1248,7 @@ void of_attach_node(struct device_node *np) write_unlock_irqrestore(&devtree_lock, flags); of_add_proc_dt_entry(np); + return 0; } #ifdef CONFIG_PROC_DEVICETREE @@ -1220,23 +1278,28 @@ static void of_remove_proc_dt_entry(struct device_node *dn) * The caller must hold a reference to the node. The memory associated with * the node is not freed until its refcount goes to zero. */ -void of_detach_node(struct device_node *np) +int of_detach_node(struct device_node *np) { struct device_node *parent; unsigned long flags; + int rc = 0; + + rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); + if (rc) + return rc; write_lock_irqsave(&devtree_lock, flags); if (of_node_check_flag(np, OF_DETACHED)) { /* someone already detached it */ write_unlock_irqrestore(&devtree_lock, flags); - return; + return rc; } parent = np->parent; if (!parent) { write_unlock_irqrestore(&devtree_lock, flags); - return; + return rc; } if (allnodes == np) @@ -1265,6 +1328,7 @@ void of_detach_node(struct device_node *np) write_unlock_irqrestore(&devtree_lock, flags); of_remove_proc_dt_entry(np); + return rc; } #endif /* defined(CONFIG_OF_DYNAMIC) */ diff --git a/include/linux/of.h b/include/linux/of.h index 72843b72a2b2..fb5d87b66e3e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -272,11 +273,24 @@ extern int prom_remove_property(struct device_node *np, struct property *prop); extern int prom_update_property(struct device_node *np, struct property *newprop); -#if defined(CONFIG_OF_DYNAMIC) /* For updating the device tree at runtime */ -extern void of_attach_node(struct device_node *); -extern void of_detach_node(struct device_node *); -#endif +#define OF_RECONFIG_ATTACH_NODE 0x0001 +#define OF_RECONFIG_DETACH_NODE 0x0002 +#define OF_RECONFIG_ADD_PROPERTY 0x0003 +#define OF_RECONFIG_REMOVE_PROPERTY 0x0004 +#define OF_RECONFIG_UPDATE_PROPERTY 0x0005 + +struct of_prop_reconfig { + struct device_node *dn; + struct property *prop; +}; + +extern int of_reconfig_notifier_register(struct notifier_block *); +extern int of_reconfig_notifier_unregister(struct notifier_block *); +extern int of_reconfig_notify(unsigned long, void *); + +extern int of_attach_node(struct device_node *); +extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) -- cgit v1.2.3 From 79d1c712958f94372482ad74578b00f44e744c12 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Tue, 2 Oct 2012 16:58:46 +0000 Subject: powerpc+of: Rename the drivers/of prom_* functions to of_* Rename the prom_*_property routines of the generic OF code to of_*_property. This brings them in line with the naming used by the rest of the OF code. Signed-off-by: Nathan Fontenot Acked-by: Geoff Levand Acked-by: Rob Herring Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt --- arch/arm/mach-mxs/mach-mxs.c | 2 +- arch/powerpc/kernel/machine_kexec.c | 14 +++++++------- arch/powerpc/kernel/machine_kexec_64.c | 8 ++++---- arch/powerpc/kernel/pci_32.c | 2 +- arch/powerpc/platforms/85xx/p1022_ds.c | 6 +++--- arch/powerpc/platforms/ps3/os-area.c | 6 +++--- arch/powerpc/platforms/pseries/iommu.c | 4 ++-- arch/powerpc/platforms/pseries/mobility.c | 4 ++-- arch/powerpc/platforms/pseries/reconfig.c | 6 +++--- drivers/macintosh/smu.c | 2 +- drivers/of/base.c | 15 +++++++-------- include/linux/of.h | 7 +++---- 12 files changed, 37 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 4748ec551a68..d61b915ce52c 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -211,7 +211,7 @@ static void __init update_fec_mac_prop(enum mac_oui oui) macaddr[4] = (val >> 8) & 0xff; macaddr[5] = (val >> 0) & 0xff; - prom_update_property(np, newmac); + of_update_property(np, newmac); } } diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index fa9f6c72f557..e1ec57e87b3b 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -218,23 +218,23 @@ static void __init export_crashk_values(struct device_node *node) * be sure what's in them, so remove them. */ prop = of_find_property(node, "linux,crashkernel-base", NULL); if (prop) - prom_remove_property(node, prop); + of_remove_property(node, prop); prop = of_find_property(node, "linux,crashkernel-size", NULL); if (prop) - prom_remove_property(node, prop); + of_remove_property(node, prop); if (crashk_res.start != 0) { - prom_add_property(node, &crashk_base_prop); + of_add_property(node, &crashk_base_prop); crashk_size = resource_size(&crashk_res); - prom_add_property(node, &crashk_size_prop); + of_add_property(node, &crashk_size_prop); } /* * memory_limit is required by the kexec-tools to limit the * crash regions to the actual memory used. */ - prom_update_property(node, &memory_limit_prop); + of_update_property(node, &memory_limit_prop); } static int __init kexec_setup(void) @@ -249,11 +249,11 @@ static int __init kexec_setup(void) /* remove any stale properties so ours can be found */ prop = of_find_property(node, kernel_end_prop.name, NULL); if (prop) - prom_remove_property(node, prop); + of_remove_property(node, prop); /* information needed by userspace when using default_machine_kexec */ kernel_end = __pa(_end); - prom_add_property(node, &kernel_end_prop); + of_add_property(node, &kernel_end_prop); export_crashk_values(node); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index d7f609086a99..7206701b1ff1 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -389,14 +389,14 @@ static int __init export_htab_values(void) /* remove any stale propertys so ours can be found */ prop = of_find_property(node, htab_base_prop.name, NULL); if (prop) - prom_remove_property(node, prop); + of_remove_property(node, prop); prop = of_find_property(node, htab_size_prop.name, NULL); if (prop) - prom_remove_property(node, prop); + of_remove_property(node, prop); htab_base = __pa(htab_address); - prom_add_property(node, &htab_base_prop); - prom_add_property(node, &htab_size_prop); + of_add_property(node, &htab_base_prop); + of_add_property(node, &htab_size_prop); of_node_put(node); return 0; diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 4b06ec5a502e..64f526a321f5 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -208,7 +208,7 @@ pci_create_OF_bus_map(void) of_prop->name = "pci-OF-bus-map"; of_prop->length = 256; of_prop->value = &of_prop[1]; - prom_add_property(dn, of_prop); + of_add_property(dn, of_prop); of_node_put(dn); } } diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 848a3e98e1c1..8fb12570b2f5 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -539,7 +539,7 @@ static void __init p1022_ds_setup_arch(void) }; /* - * prom_update_property() is called before + * of_update_property() is called before * kmalloc() is available, so the 'new' object * should be allocated in the global area. * The easiest way is to do that is to @@ -548,7 +548,7 @@ static void __init p1022_ds_setup_arch(void) */ pr_info("p1022ds: disabling %s node", np2->full_name); - prom_update_property(np2, &nor_status); + of_update_property(np2, &nor_status); of_node_put(np2); } @@ -564,7 +564,7 @@ static void __init p1022_ds_setup_arch(void) pr_info("p1022ds: disabling %s node", np2->full_name); - prom_update_property(np2, &nand_status); + of_update_property(np2, &nand_status); of_node_put(np2); } diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 56d26bc4fd41..09787139834d 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -280,13 +280,13 @@ static void os_area_set_property(struct device_node *node, if (tmp) { pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name); - prom_remove_property(node, tmp); + of_remove_property(node, tmp); } - result = prom_add_property(node, prop); + result = of_add_property(node, prop); if (result) - pr_debug("%s:%d prom_set_property failed\n", __func__, + pr_debug("%s:%d of_set_property failed\n", __func__, __LINE__); } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index da5594c441e4..e2685badb5db 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -760,7 +760,7 @@ static void remove_ddw(struct device_node *np) __remove_ddw(np, ddw_avail, liobn); delprop: - ret = prom_remove_property(np, win64); + ret = of_remove_property(np, win64); if (ret) pr_warning("%s: failed to remove direct window property: %d\n", np->full_name, ret); @@ -1070,7 +1070,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_free_window; } - ret = prom_add_property(pdn, win64); + ret = of_add_property(pdn, win64); if (ret) { dev_err(&dev->dev, "unable to add dma window property for %s: %d", pdn->full_name, ret); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index dd30b12edfe4..6573808cc5f3 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -116,7 +116,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, } if (!more) { - prom_update_property(dn, new_prop); + of_update_property(dn, new_prop); new_prop = NULL; } @@ -172,7 +172,7 @@ static int update_dt_node(u32 phandle) case 0x80000000: prop = of_find_property(dn, prop_name, NULL); - prom_remove_property(dn, prop); + of_remove_property(dn, prop); prop = NULL; break; diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 720a0cc2e69f..30b358dc2beb 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -326,7 +326,7 @@ static int do_add_property(char *buf, size_t bufsize) if (!prop) return -ENOMEM; - prom_add_property(np, prop); + of_add_property(np, prop); return 0; } @@ -350,7 +350,7 @@ static int do_remove_property(char *buf, size_t bufsize) prop = of_find_property(np, buf, NULL); - return prom_remove_property(np, prop); + return of_remove_property(np, prop); } static int do_update_property(char *buf, size_t bufsize) @@ -380,7 +380,7 @@ static int do_update_property(char *buf, size_t bufsize) if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size")) slb_set_size(*(int *)value); - return prom_update_property(np, newprop); + return of_update_property(np, newprop); } /** diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 7d5a6b40b31c..5b939509db3b 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -997,7 +997,7 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id) "%02x !\n", id, hdr->id); goto failure; } - if (prom_add_property(smu->of_node, prop)) { + if (of_add_property(smu->of_node, prop)) { printk(KERN_DEBUG "SMU: Failed creating sdb-partition-%02x " "property !\n", id); goto failure; diff --git a/drivers/of/base.c b/drivers/of/base.c index 87b63850e8dc..02d94c4ea83c 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1047,9 +1047,9 @@ static int of_property_notify(int action, struct device_node *np, #endif /** - * prom_add_property - Add a property to a node + * of_add_property - Add a property to a node */ -int prom_add_property(struct device_node *np, struct property *prop) +int of_add_property(struct device_node *np, struct property *prop) { struct property **next; unsigned long flags; @@ -1083,14 +1083,14 @@ int prom_add_property(struct device_node *np, struct property *prop) } /** - * prom_remove_property - Remove a property from a node. + * of_remove_property - Remove a property from a node. * * Note that we don't actually remove it, since we have given out * who-knows-how-many pointers to the data using get-property. * Instead we just move the property to the "dead properties" * list, so it won't be found any more. */ -int prom_remove_property(struct device_node *np, struct property *prop) +int of_remove_property(struct device_node *np, struct property *prop) { struct property **next; unsigned long flags; @@ -1129,7 +1129,7 @@ int prom_remove_property(struct device_node *np, struct property *prop) } /* - * prom_update_property - Update a property in a node, if the property does + * of_update_property - Update a property in a node, if the property does * not exist, add it. * * Note that we don't actually remove it, since we have given out @@ -1137,8 +1137,7 @@ int prom_remove_property(struct device_node *np, struct property *prop) * Instead we just move the property to the "dead properties" list, * and add the new property to the property list */ -int prom_update_property(struct device_node *np, - struct property *newprop) +int of_update_property(struct device_node *np, struct property *newprop) { struct property **next, *oldprop; unsigned long flags; @@ -1153,7 +1152,7 @@ int prom_update_property(struct device_node *np, oldprop = of_find_property(np, newprop->name, NULL); if (!oldprop) - return prom_add_property(np, newprop); + return of_add_property(np, newprop); write_lock_irqsave(&devtree_lock, flags); next = &np->properties; diff --git a/include/linux/of.h b/include/linux/of.h index fb5d87b66e3e..a093b2fe5dfb 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -268,10 +268,9 @@ extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_machine_is_compatible(const char *compat); -extern int prom_add_property(struct device_node* np, struct property* prop); -extern int prom_remove_property(struct device_node *np, struct property *prop); -extern int prom_update_property(struct device_node *np, - struct property *newprop); +extern int of_add_property(struct device_node *np, struct property *prop); +extern int of_remove_property(struct device_node *np, struct property *prop); +extern int of_update_property(struct device_node *np, struct property *newprop); /* For updating the device tree at runtime */ #define OF_RECONFIG_ATTACH_NODE 0x0001 -- cgit v1.2.3 From 621eb19ce1ec216e03ad354cb0c4061736b2a436 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 14 Nov 2012 10:48:05 -0500 Subject: svcrpc: Revert "sunrpc/cache.h: replace simple_strtoul" Commit bbf43dc888833ac0539e437dbaeb28bfd4fbab9f "sunrpc/cache.h: replace simple_strtoul" introduced new range-checking which could cause get_int to fail on unsigned integers too large to be represented as an int. We could parse them as unsigned instead--but it turns out svcgssd is actually passing down "-1" in some cases. Which is perhaps stupid, but there's nothing we can do about it now. So just revert back to the previous "sloppy" behavior that accepts either representation. Cc: stable@vger.kernel.org Reported-by: Sven Geggus Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index f792794f6634..5dc9ee4d616e 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize); static inline int get_int(char **bpp, int *anint) { char buf[50]; + char *ep; + int rv; int len = qword_get(bpp, buf, sizeof(buf)); if (len < 0) @@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint) if (len == 0) return -ENOENT; - if (kstrtoint(buf, 0, anint)) + rv = simple_strtol(buf, &ep, 0); + if (*ep) return -EINVAL; + *anint = rv; return 0; } -- cgit v1.2.3 From fc05d5a30dc19dd4c6d161e551719a8c597c7890 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 4 Oct 2012 09:28:49 +0200 Subject: mtd: delete nomadik_nand driver The nomadik_nand driver is really just a subset of the FSMC NAND driver, and there are no users anymore so let's delete it. Signed-off-by: Linus Walleij Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Artem Bityutskiy --- drivers/mtd/nand/Kconfig | 6 - drivers/mtd/nand/Makefile | 1 - drivers/mtd/nand/nomadik_nand.c | 235 ------------------------- include/linux/platform_data/mtd-nomadik-nand.h | 16 -- 4 files changed, 258 deletions(-) delete mode 100644 drivers/mtd/nand/nomadik_nand.c delete mode 100644 include/linux/platform_data/mtd-nomadik-nand.h (limited to 'include/linux') diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index ee803d611e4e..a803d9ba55bd 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -526,12 +526,6 @@ config MTD_NAND_MXC This enables the driver for the NAND flash controller on the MXC processors. -config MTD_NAND_NOMADIK - tristate "ST Nomadik 8815 NAND support" - depends on ARCH_NOMADIK - help - Driver for the NAND flash controller on the Nomadik, with ECC. - config MTD_NAND_SH_FLCTL tristate "Support for NAND on Renesas SuperH FLCTL" depends on SUPERH || ARCH_SHMOBILE diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index 38358c90771e..44fca0553365 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -49,7 +49,6 @@ obj-$(CONFIG_MTD_NAND_MXC) += mxc_nand.o obj-$(CONFIG_MTD_NAND_SOCRATES) += socrates_nand.o obj-$(CONFIG_MTD_NAND_TXX9NDFMC) += txx9ndfmc.o obj-$(CONFIG_MTD_NAND_NUC900) += nuc900_nand.o -obj-$(CONFIG_MTD_NAND_NOMADIK) += nomadik_nand.o obj-$(CONFIG_MTD_NAND_MPC5121_NFC) += mpc5121_nfc.o obj-$(CONFIG_MTD_NAND_RICOH) += r852.o obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c deleted file mode 100644 index 9ee0c4edfacf..000000000000 --- a/drivers/mtd/nand/nomadik_nand.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * drivers/mtd/nand/nomadik_nand.c - * - * Overview: - * Driver for on-board NAND flash on Nomadik Platforms - * - * Copyright © 2007 STMicroelectronics Pvt. Ltd. - * Author: Sachin Verma - * - * Copyright © 2009 Alessandro Rubini - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -struct nomadik_nand_host { - struct mtd_info mtd; - struct nand_chip nand; - void __iomem *data_va; - void __iomem *cmd_va; - void __iomem *addr_va; - struct nand_bbt_descr *bbt_desc; -}; - -static struct nand_ecclayout nomadik_ecc_layout = { - .eccbytes = 3 * 4, - .eccpos = { /* each subpage has 16 bytes: pos 2,3,4 hosts ECC */ - 0x02, 0x03, 0x04, - 0x12, 0x13, 0x14, - 0x22, 0x23, 0x24, - 0x32, 0x33, 0x34}, - /* let's keep bytes 5,6,7 for us, just in case we change ECC algo */ - .oobfree = { {0x08, 0x08}, {0x18, 0x08}, {0x28, 0x08}, {0x38, 0x08} }, -}; - -static void nomadik_ecc_control(struct mtd_info *mtd, int mode) -{ - /* No need to enable hw ecc, it's on by default */ -} - -static void nomadik_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) -{ - struct nand_chip *nand = mtd->priv; - struct nomadik_nand_host *host = nand->priv; - - if (cmd == NAND_CMD_NONE) - return; - - if (ctrl & NAND_CLE) - writeb(cmd, host->cmd_va); - else - writeb(cmd, host->addr_va); -} - -static int nomadik_nand_probe(struct platform_device *pdev) -{ - struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data; - struct nomadik_nand_host *host; - struct mtd_info *mtd; - struct nand_chip *nand; - struct resource *res; - int ret = 0; - - /* Allocate memory for the device structure (and zero it) */ - host = kzalloc(sizeof(struct nomadik_nand_host), GFP_KERNEL); - if (!host) { - dev_err(&pdev->dev, "Failed to allocate device structure.\n"); - return -ENOMEM; - } - - /* Call the client's init function, if any */ - if (pdata->init) - ret = pdata->init(); - if (ret < 0) { - dev_err(&pdev->dev, "Init function failed\n"); - goto err; - } - - /* ioremap three regions */ - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr"); - if (!res) { - ret = -EIO; - goto err_unmap; - } - host->addr_va = ioremap(res->start, resource_size(res)); - - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data"); - if (!res) { - ret = -EIO; - goto err_unmap; - } - host->data_va = ioremap(res->start, resource_size(res)); - - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd"); - if (!res) { - ret = -EIO; - goto err_unmap; - } - host->cmd_va = ioremap(res->start, resource_size(res)); - - if (!host->addr_va || !host->data_va || !host->cmd_va) { - ret = -ENOMEM; - goto err_unmap; - } - - /* Link all private pointers */ - mtd = &host->mtd; - nand = &host->nand; - mtd->priv = nand; - nand->priv = host; - - host->mtd.owner = THIS_MODULE; - nand->IO_ADDR_R = host->data_va; - nand->IO_ADDR_W = host->data_va; - nand->cmd_ctrl = nomadik_cmd_ctrl; - - /* - * This stanza declares ECC_HW but uses soft routines. It's because - * HW claims to make the calculation but not the correction. However, - * I haven't managed to get the desired data out of it until now. - */ - nand->ecc.mode = NAND_ECC_SOFT; - nand->ecc.layout = &nomadik_ecc_layout; - nand->ecc.hwctl = nomadik_ecc_control; - nand->ecc.size = 512; - nand->ecc.bytes = 3; - - nand->options = pdata->options; - - /* - * Scan to find existence of the device - */ - if (nand_scan(&host->mtd, 1)) { - ret = -ENXIO; - goto err_unmap; - } - - mtd_device_register(&host->mtd, pdata->parts, pdata->nparts); - - platform_set_drvdata(pdev, host); - return 0; - - err_unmap: - if (host->cmd_va) - iounmap(host->cmd_va); - if (host->data_va) - iounmap(host->data_va); - if (host->addr_va) - iounmap(host->addr_va); - err: - kfree(host); - return ret; -} - -/* - * Clean up routine - */ -static int nomadik_nand_remove(struct platform_device *pdev) -{ - struct nomadik_nand_host *host = platform_get_drvdata(pdev); - struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data; - - if (pdata->exit) - pdata->exit(); - - if (host) { - nand_release(&host->mtd); - iounmap(host->cmd_va); - iounmap(host->data_va); - iounmap(host->addr_va); - kfree(host); - } - return 0; -} - -static int nomadik_nand_suspend(struct device *dev) -{ - struct nomadik_nand_host *host = dev_get_drvdata(dev); - int ret = 0; - if (host) - ret = mtd_suspend(&host->mtd); - return ret; -} - -static int nomadik_nand_resume(struct device *dev) -{ - struct nomadik_nand_host *host = dev_get_drvdata(dev); - if (host) - mtd_resume(&host->mtd); - return 0; -} - -static const struct dev_pm_ops nomadik_nand_pm_ops = { - .suspend = nomadik_nand_suspend, - .resume = nomadik_nand_resume, -}; - -static struct platform_driver nomadik_nand_driver = { - .probe = nomadik_nand_probe, - .remove = nomadik_nand_remove, - .driver = { - .owner = THIS_MODULE, - .name = "nomadik_nand", - .pm = &nomadik_nand_pm_ops, - }, -}; - -module_platform_driver(nomadik_nand_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("ST Microelectronics (sachin.verma@st.com)"); -MODULE_DESCRIPTION("NAND driver for Nomadik Platform"); diff --git a/include/linux/platform_data/mtd-nomadik-nand.h b/include/linux/platform_data/mtd-nomadik-nand.h deleted file mode 100644 index c3c8254c22a5..000000000000 --- a/include/linux/platform_data/mtd-nomadik-nand.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __ASM_ARCH_NAND_H -#define __ASM_ARCH_NAND_H - -struct nomadik_nand_platform_data { - struct mtd_partition *parts; - int nparts; - int options; - int (*init) (void); - int (*exit) (void); -}; - -#define NAND_IO_DATA 0x40000000 -#define NAND_IO_CMD 0x40800000 -#define NAND_IO_ADDR 0x41000000 - -#endif /* __ASM_ARCH_NAND_H */ -- cgit v1.2.3 From 6d7b42a447f92eb3e7e410bbf62042693eb040f7 Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Thu, 4 Oct 2012 15:14:16 +0200 Subject: mtd: fsmc_nand: pass the ale and cmd resource via resource Do not use the platform_data to pass resource and be smart in the drivers. Just pass it via resource Switch to devm_request_and_ioremap at the sametime Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Linus Walleij Reviewed-By: Vipin Kumar Signed-off-by: Artem Bityutskiy --- .../devicetree/bindings/mtd/fsmc-nand.txt | 12 +++--- arch/arm/boot/dts/spear13xx.dtsi | 10 ++--- arch/arm/boot/dts/spear300.dtsi | 8 ++-- arch/arm/boot/dts/spear310.dtsi | 8 ++-- arch/arm/boot/dts/spear320.dtsi | 8 ++-- arch/arm/boot/dts/spear600.dtsi | 8 ++-- arch/arm/mach-u300/core.c | 14 ++++++- drivers/mtd/nand/fsmc_nand.c | 44 ++++++---------------- include/linux/mtd/fsmc.h | 3 -- 9 files changed, 49 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt index e2c663b354d2..e3ea32e7de3e 100644 --- a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt +++ b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt @@ -3,9 +3,7 @@ Required properties: - compatible : "st,spear600-fsmc-nand" - reg : Address range of the mtd chip -- reg-names: Should contain the reg names "fsmc_regs" and "nand_data" -- st,ale-off : Chip specific offset to ALE -- st,cle-off : Chip specific offset to CLE +- reg-names: Should contain the reg names "fsmc_regs", "nand_data", "nand_addr" and "nand_cmd" Optional properties: - bank-width : Width (in bytes) of the device. If not present, the width @@ -19,10 +17,10 @@ Example: #address-cells = <1>; #size-cells = <1>; reg = <0xd1800000 0x1000 /* FSMC Register */ - 0xd2000000 0x4000>; /* NAND Base */ - reg-names = "fsmc_regs", "nand_data"; - st,ale-off = <0x20000>; - st,cle-off = <0x10000>; + 0xd2000000 0x0010 /* NAND Base DATA */ + 0xd2020000 0x0010 /* NAND Base ADDR */ + 0xd2010000 0x0010>; /* NAND Base CMD */ + reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd"; bank-width = <1>; nand-skip-bbtscan; diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index f7b84aced654..14a6d15c2a81 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -104,15 +104,15 @@ compatible = "st,spear600-fsmc-nand"; #address-cells = <1>; #size-cells = <1>; - reg = <0xb0000000 0x1000 /* FSMC Register */ - 0xb0800000 0x0010>; /* NAND Base */ - reg-names = "fsmc_regs", "nand_data"; + reg = <0xb0000000 0x1000 /* FSMC Register*/ + 0xb0800000 0x0010 /* NAND Base DATA */ + 0xb0820000 0x0010 /* NAND Base ADDR */ + 0xb0810000 0x0010>; /* NAND Base CMD */ + reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd"; interrupts = <0 20 0x4 0 21 0x4 0 22 0x4 0 23 0x4>; - st,ale-off = <0x20000>; - st,cle-off = <0x10000>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi index ed3627c116cc..bc436387d7f9 100644 --- a/arch/arm/boot/dts/spear300.dtsi +++ b/arch/arm/boot/dts/spear300.dtsi @@ -38,10 +38,10 @@ #address-cells = <1>; #size-cells = <1>; reg = <0x94000000 0x1000 /* FSMC Register */ - 0x80000000 0x0010>; /* NAND Base */ - reg-names = "fsmc_regs", "nand_data"; - st,ale-off = <0x20000>; - st,cle-off = <0x10000>; + 0x80000000 0x0010 /* NAND Base DATA */ + 0x80020000 0x0010 /* NAND Base ADDR */ + 0x80010000 0x0010>; /* NAND Base CMD */ + reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi index 62fc4fb3e5f9..7840e529aba2 100644 --- a/arch/arm/boot/dts/spear310.dtsi +++ b/arch/arm/boot/dts/spear310.dtsi @@ -32,10 +32,10 @@ #address-cells = <1>; #size-cells = <1>; reg = <0x44000000 0x1000 /* FSMC Register */ - 0x40000000 0x0010>; /* NAND Base */ - reg-names = "fsmc_regs", "nand_data"; - st,ale-off = <0x10000>; - st,cle-off = <0x20000>; + 0x40000000 0x0010 /* NAND Base DATA */ + 0x40020000 0x0010 /* NAND Base ADDR */ + 0x40010000 0x0010>; /* NAND Base CMD */ + reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi index 1f49d69595a0..5ad820641ac0 100644 --- a/arch/arm/boot/dts/spear320.dtsi +++ b/arch/arm/boot/dts/spear320.dtsi @@ -38,10 +38,10 @@ #address-cells = <1>; #size-cells = <1>; reg = <0x4c000000 0x1000 /* FSMC Register */ - 0x50000000 0x0010>; /* NAND Base */ - reg-names = "fsmc_regs", "nand_data"; - st,ale-off = <0x20000>; - st,cle-off = <0x10000>; + 0x50000000 0x0010 /* NAND Base DATA */ + 0x50020000 0x0010 /* NAND Base ADDR */ + 0x50010000 0x0010>; /* NAND Base CMD */ + reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi index a3c36e47d7ef..4ecc66f5ac88 100644 --- a/arch/arm/boot/dts/spear600.dtsi +++ b/arch/arm/boot/dts/spear600.dtsi @@ -67,10 +67,10 @@ #address-cells = <1>; #size-cells = <1>; reg = <0xd1800000 0x1000 /* FSMC Register */ - 0xd2000000 0x4000>; /* NAND Base */ - reg-names = "fsmc_regs", "nand_data"; - st,ale-off = <0x20000>; - st,cle-off = <0x10000>; + 0xd2000000 0x0010 /* NAND Base DATA */ + 0xd2020000 0x0010 /* NAND Base ADDR */ + 0xd2010000 0x0010>; /* NAND Base CMD */ + reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd"; status = "disabled"; }; diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index b8efac4daed8..f642a1552346 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -251,6 +251,18 @@ static struct resource rtc_resources[] = { * but these are not yet used by the driver. */ static struct resource fsmc_resources[] = { + { + .name = "nand_addr", + .start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE, + .end = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, + { + .name = "nand_cmd", + .start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE, + .end = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, { .name = "nand_data", .start = U300_NAND_CS0_PHYS_BASE, @@ -1496,8 +1508,6 @@ static struct fsmc_nand_platform_data nand_platform_data = { .nr_partitions = ARRAY_SIZE(u300_partitions), .options = NAND_SKIP_BBTSCAN, .width = FSMC_NAND_BW8, - .ale_off = PLAT_NAND_ALE, - .cle_off = PLAT_NAND_CLE, }; static struct platform_device nand_device = { diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c index 38d26240d8b1..cb8645087151 100644 --- a/drivers/mtd/nand/fsmc_nand.c +++ b/drivers/mtd/nand/fsmc_nand.c @@ -876,8 +876,6 @@ static int __devinit fsmc_nand_probe_config_dt(struct platform_device *pdev, return -EINVAL; } } - of_property_read_u32(np, "st,ale-off", &pdata->ale_off); - of_property_read_u32(np, "st,cle-off", &pdata->cle_off); if (of_get_property(np, "nand-skip-bbtscan", NULL)) pdata->options = NAND_SKIP_BBTSCAN; @@ -935,41 +933,28 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) if (!res) return -EINVAL; - if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res), - pdev->name)) { - dev_err(&pdev->dev, "Failed to get memory data resourse\n"); - return -ENOENT; - } - - host->data_pa = (dma_addr_t)res->start; - host->data_va = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); + host->data_va = devm_request_and_ioremap(&pdev->dev, res); if (!host->data_va) { dev_err(&pdev->dev, "data ioremap failed\n"); return -ENOMEM; } + host->data_pa = (dma_addr_t)res->start; - if (!devm_request_mem_region(&pdev->dev, res->start + pdata->ale_off, - resource_size(res), pdev->name)) { - dev_err(&pdev->dev, "Failed to get memory ale resourse\n"); - return -ENOENT; - } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr"); + if (!res) + return -EINVAL; - host->addr_va = devm_ioremap(&pdev->dev, res->start + pdata->ale_off, - resource_size(res)); + host->addr_va = devm_request_and_ioremap(&pdev->dev, res); if (!host->addr_va) { dev_err(&pdev->dev, "ale ioremap failed\n"); return -ENOMEM; } - if (!devm_request_mem_region(&pdev->dev, res->start + pdata->cle_off, - resource_size(res), pdev->name)) { - dev_err(&pdev->dev, "Failed to get memory cle resourse\n"); - return -ENOENT; - } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd"); + if (!res) + return -EINVAL; - host->cmd_va = devm_ioremap(&pdev->dev, res->start + pdata->cle_off, - resource_size(res)); + host->cmd_va = devm_request_and_ioremap(&pdev->dev, res); if (!host->cmd_va) { dev_err(&pdev->dev, "ale ioremap failed\n"); return -ENOMEM; @@ -979,14 +964,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) if (!res) return -EINVAL; - if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res), - pdev->name)) { - dev_err(&pdev->dev, "Failed to get memory regs resourse\n"); - return -ENOENT; - } - - host->regs_va = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); + host->regs_va = devm_request_and_ioremap(&pdev->dev, res); if (!host->regs_va) { dev_err(&pdev->dev, "regs ioremap failed\n"); return -ENOMEM; diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h index b20029221fb1..d6ed61ef451d 100644 --- a/include/linux/mtd/fsmc.h +++ b/include/linux/mtd/fsmc.h @@ -155,9 +155,6 @@ struct fsmc_nand_platform_data { unsigned int width; unsigned int bank; - /* CLE, ALE offsets */ - unsigned int cle_off; - unsigned int ale_off; enum access_mode mode; void (*select_bank)(uint32_t bank, uint32_t busw); -- cgit v1.2.3 From 2f25ae97fe4b424d88d765797c46456c7c0f1bae Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Tue, 9 Oct 2012 16:14:53 +0530 Subject: mtd: nand: Increase the ecc placement locations to 640 Few devices like H27UBG8T2CTR have a writesize/oobsize of 8KB/640B. This means that the maximum oobsize has gone up to 640 bytes and consequently the maximum ecc placement locations have also gone up to 640. Signed-off-by: Vipin Kumar Signed-off-by: Artem Bityutskiy --- include/linux/mtd/mtd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 81d61e704599..f9ac2897b86b 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -98,7 +98,7 @@ struct mtd_oob_ops { }; #define MTD_MAX_OOBFREE_ENTRIES_LARGE 32 -#define MTD_MAX_ECCPOS_ENTRIES_LARGE 448 +#define MTD_MAX_ECCPOS_ENTRIES_LARGE 640 /* * Internal ECC layout control structure. For historical reasons, there is a * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained -- cgit v1.2.3 From 5de0b52ea8f8f5149502867acff2efb5efaf1fc2 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Sat, 13 Oct 2012 13:03:29 -0400 Subject: mtd: gpmi: remove unneccessary header The whole gpmi-nand driver has turned to pure devicetree supported. So the linux/mtd/gpmi-nand.h is not neccessary now. Just remove it, and move some macros to the gpmi-nand driver itself. Signed-off-by: Huang Shijie Signed-off-by: Artem Bityutskiy --- drivers/mtd/nand/gpmi-nand/gpmi-lib.c | 1 - drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 7 +++- drivers/mtd/nand/gpmi-nand/gpmi-nand.h | 1 - include/linux/mtd/gpmi-nand.h | 68 ---------------------------------- 4 files changed, 6 insertions(+), 71 deletions(-) delete mode 100644 include/linux/mtd/gpmi-nand.h (limited to 'include/linux') diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index 3502accd4bc3..1585c5b1c8bf 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -18,7 +18,6 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include #include #include diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index e2c56fc4574b..d37619882fa6 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,12 @@ #include #include "gpmi-nand.h" +/* Resource names for the GPMI NAND driver. */ +#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME "gpmi-nand" +#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME "bch" +#define GPMI_NAND_BCH_INTERRUPT_RES_NAME "bch" +#define GPMI_NAND_DMA_INTERRUPT_RES_NAME "gpmi-dma" + /* add our owner bbt descriptor */ static uint8_t scan_ff_pattern[] = { 0xff }; static struct nand_bbt_descr gpmi_bbt_descr = { diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h index 7ac25c1e58f9..3d93a5e39090 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h @@ -130,7 +130,6 @@ struct gpmi_nand_data { /* System Interface */ struct device *dev; struct platform_device *pdev; - struct gpmi_nand_platform_data *pdata; /* Resources */ struct resources resources; diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h deleted file mode 100644 index ed3c4e09f3d1..000000000000 --- a/include/linux/mtd/gpmi-nand.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef __MACH_MXS_GPMI_NAND_H__ -#define __MACH_MXS_GPMI_NAND_H__ - -/* The size of the resources is fixed. */ -#define GPMI_NAND_RES_SIZE 6 - -/* Resource names for the GPMI NAND driver. */ -#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME "gpmi-nand" -#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME "GPMI NAND GPMI Interrupt" -#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME "bch" -#define GPMI_NAND_BCH_INTERRUPT_RES_NAME "bch" -#define GPMI_NAND_DMA_CHANNELS_RES_NAME "GPMI NAND DMA Channels" -#define GPMI_NAND_DMA_INTERRUPT_RES_NAME "gpmi-dma" - -/** - * struct gpmi_nand_platform_data - GPMI NAND driver platform data. - * - * This structure communicates platform-specific information to the GPMI NAND - * driver that can't be expressed as resources. - * - * @platform_init: A pointer to a function the driver will call to - * initialize the platform (e.g., set up the pin mux). - * @min_prop_delay_in_ns: Minimum propagation delay of GPMI signals to and - * from the NAND Flash device, in nanoseconds. - * @max_prop_delay_in_ns: Maximum propagation delay of GPMI signals to and - * from the NAND Flash device, in nanoseconds. - * @max_chip_count: The maximum number of chips for which the driver - * should configure the hardware. This value most - * likely reflects the number of pins that are - * connected to a NAND Flash device. If this is - * greater than the SoC hardware can support, the - * driver will print a message and fail to initialize. - * @partitions: An optional pointer to an array of partition - * descriptions. - * @partition_count: The number of elements in the partitions array. - */ -struct gpmi_nand_platform_data { - /* SoC hardware information. */ - int (*platform_init)(void); - - /* NAND Flash information. */ - unsigned int min_prop_delay_in_ns; - unsigned int max_prop_delay_in_ns; - unsigned int max_chip_count; - - /* Medium information. */ - struct mtd_partition *partitions; - unsigned partition_count; -}; -#endif -- cgit v1.2.3 From e8a9d8f31c592eea89f1b0d3fd425e7a96944e88 Mon Sep 17 00:00:00 2001 From: Bastian Hecht Date: Fri, 19 Oct 2012 12:15:34 +0200 Subject: mtd: sh_flctl: Minor cleanups Some small fixes to avoid sparse and smatch complain. Other cosmetic fixes as well. - Change of the type of the member index in struct sh_flctl from signed to unsigned. We use index by addressing array members, so unsigned is more concise here. Adapt functions relying on sh_flctl::index. - Remove a blurring cast in write_fiforeg(). - Apply consistent naming scheme when refering to the data buffer. - Shorten some unnecessarily verbose functions. - Remove spaces at start of lines. Signed-off-by: Bastian Hecht Signed-off-by: Artem Bityutskiy --- drivers/mtd/nand/sh_flctl.c | 37 ++++++++++++++++--------------------- include/linux/mtd/sh_flctl.h | 2 +- 2 files changed, 17 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index 4fbfe96e37a1..78d18c0f132f 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -225,7 +225,7 @@ static enum flctl_ecc_res_t wait_recfifo_ready for (i = 0; i < 3; i++) { uint8_t org; - int index; + unsigned int index; data = readl(ecc_reg[i]); @@ -305,28 +305,29 @@ static enum flctl_ecc_res_t read_ecfiforeg return res; } -static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset) +static void write_fiforeg(struct sh_flctl *flctl, int rlen, + unsigned int offset) { int i, len_4align; - unsigned long *data = (unsigned long *)&flctl->done_buff[offset]; - void *fifo_addr = (void *)FLDTFIFO(flctl); + unsigned long *buf = (unsigned long *)&flctl->done_buff[offset]; len_4align = (rlen + 3) / 4; for (i = 0; i < len_4align; i++) { wait_wfifo_ready(flctl); - writel(cpu_to_be32(data[i]), fifo_addr); + writel(cpu_to_be32(buf[i]), FLDTFIFO(flctl)); } } -static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, int offset) +static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, + unsigned int offset) { int i, len_4align; - unsigned long *data = (unsigned long *)&flctl->done_buff[offset]; + unsigned long *buf = (unsigned long *)&flctl->done_buff[offset]; len_4align = (rlen + 3) / 4; for (i = 0; i < len_4align; i++) { wait_wecfifo_ready(flctl); - writel(cpu_to_be32(data[i]), FLECFIFO(flctl)); + writel(cpu_to_be32(buf[i]), FLECFIFO(flctl)); } } @@ -748,41 +749,35 @@ static void flctl_select_chip(struct mtd_info *mtd, int chipnr) static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len) { struct sh_flctl *flctl = mtd_to_flctl(mtd); - int index = flctl->index; - memcpy(&flctl->done_buff[index], buf, len); + memcpy(&flctl->done_buff[flctl->index], buf, len); flctl->index += len; } static uint8_t flctl_read_byte(struct mtd_info *mtd) { struct sh_flctl *flctl = mtd_to_flctl(mtd); - int index = flctl->index; uint8_t data; - data = flctl->done_buff[index]; + data = flctl->done_buff[flctl->index]; flctl->index++; return data; } static uint16_t flctl_read_word(struct mtd_info *mtd) { - struct sh_flctl *flctl = mtd_to_flctl(mtd); - int index = flctl->index; - uint16_t data; - uint16_t *buf = (uint16_t *)&flctl->done_buff[index]; + struct sh_flctl *flctl = mtd_to_flctl(mtd); + uint16_t *buf = (uint16_t *)&flctl->done_buff[flctl->index]; - data = *buf; - flctl->index += 2; - return data; + flctl->index += 2; + return *buf; } static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len) { struct sh_flctl *flctl = mtd_to_flctl(mtd); - int index = flctl->index; - memcpy(buf, &flctl->done_buff[index], len); + memcpy(buf, &flctl->done_buff[flctl->index], len); flctl->index += len; } diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index 01e4b15b280e..481557688d05 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -147,7 +147,7 @@ struct sh_flctl { uint8_t done_buff[2048 + 64]; /* max size 2048 + 64 */ int read_bytes; - int index; + unsigned int index; int seqin_column; /* column in SEQIN cmd */ int seqin_page_addr; /* page_addr in SEQIN cmd */ uint32_t seqin_read_cmd; /* read cmd in SEQIN cmd */ -- cgit v1.2.3 From 83738d87e3a0a4096e1419a65b8228130d183df6 Mon Sep 17 00:00:00 2001 From: Bastian Hecht Date: Fri, 19 Oct 2012 12:15:35 +0200 Subject: mtd: sh_flctl: Add DMA capabilty The code probes if DMA channels can get allocated and tears them down at removal/failure if needed. If available it uses them to transfer the data part (not ECC). On failure we fall back to PIO mode. Based on Guennadi Liakhovetski's code from the sh_mmcif driver. Signed-off-by: Bastian Hecht Reviewed-by: Guennadi Liakhovetski Signed-off-by: Artem Bityutskiy --- drivers/mtd/nand/sh_flctl.c | 173 ++++++++++++++++++++++++++++++++++++++++++- include/linux/mtd/sh_flctl.h | 12 +++ 2 files changed, 183 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index 78d18c0f132f..6dc0369aa44b 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -23,11 +23,15 @@ #include #include +#include #include +#include +#include #include #include #include #include +#include #include #include @@ -106,6 +110,84 @@ static void wait_completion(struct sh_flctl *flctl) writeb(0x0, FLTRCR(flctl)); } +static void flctl_dma_complete(void *param) +{ + struct sh_flctl *flctl = param; + + complete(&flctl->dma_complete); +} + +static void flctl_release_dma(struct sh_flctl *flctl) +{ + if (flctl->chan_fifo0_rx) { + dma_release_channel(flctl->chan_fifo0_rx); + flctl->chan_fifo0_rx = NULL; + } + if (flctl->chan_fifo0_tx) { + dma_release_channel(flctl->chan_fifo0_tx); + flctl->chan_fifo0_tx = NULL; + } +} + +static void flctl_setup_dma(struct sh_flctl *flctl) +{ + dma_cap_mask_t mask; + struct dma_slave_config cfg; + struct platform_device *pdev = flctl->pdev; + struct sh_flctl_platform_data *pdata = pdev->dev.platform_data; + int ret; + + if (!pdata) + return; + + if (pdata->slave_id_fifo0_tx <= 0 || pdata->slave_id_fifo0_rx <= 0) + return; + + /* We can only either use DMA for both Tx and Rx or not use it at all */ + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + flctl->chan_fifo0_tx = dma_request_channel(mask, shdma_chan_filter, + (void *)pdata->slave_id_fifo0_tx); + dev_dbg(&pdev->dev, "%s: TX: got channel %p\n", __func__, + flctl->chan_fifo0_tx); + + if (!flctl->chan_fifo0_tx) + return; + + memset(&cfg, 0, sizeof(cfg)); + cfg.slave_id = pdata->slave_id_fifo0_tx; + cfg.direction = DMA_MEM_TO_DEV; + cfg.dst_addr = (dma_addr_t)FLDTFIFO(flctl); + cfg.src_addr = 0; + ret = dmaengine_slave_config(flctl->chan_fifo0_tx, &cfg); + if (ret < 0) + goto err; + + flctl->chan_fifo0_rx = dma_request_channel(mask, shdma_chan_filter, + (void *)pdata->slave_id_fifo0_rx); + dev_dbg(&pdev->dev, "%s: RX: got channel %p\n", __func__, + flctl->chan_fifo0_rx); + + if (!flctl->chan_fifo0_rx) + goto err; + + cfg.slave_id = pdata->slave_id_fifo0_rx; + cfg.direction = DMA_DEV_TO_MEM; + cfg.dst_addr = 0; + cfg.src_addr = (dma_addr_t)FLDTFIFO(flctl); + ret = dmaengine_slave_config(flctl->chan_fifo0_rx, &cfg); + if (ret < 0) + goto err; + + init_completion(&flctl->dma_complete); + + return; + +err: + flctl_release_dma(flctl); +} + static void set_addr(struct mtd_info *mtd, int column, int page_addr) { struct sh_flctl *flctl = mtd_to_flctl(mtd); @@ -261,6 +343,70 @@ static void wait_wecfifo_ready(struct sh_flctl *flctl) timeout_error(flctl, __func__); } +static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, + int len, enum dma_data_direction dir) +{ + struct dma_async_tx_descriptor *desc = NULL; + struct dma_chan *chan; + enum dma_transfer_direction tr_dir; + dma_addr_t dma_addr; + dma_cookie_t cookie = -EINVAL; + uint32_t reg; + int ret; + + if (dir == DMA_FROM_DEVICE) { + chan = flctl->chan_fifo0_rx; + tr_dir = DMA_DEV_TO_MEM; + } else { + chan = flctl->chan_fifo0_tx; + tr_dir = DMA_MEM_TO_DEV; + } + + dma_addr = dma_map_single(chan->device->dev, buf, len, dir); + + if (dma_addr) + desc = dmaengine_prep_slave_single(chan, dma_addr, len, + tr_dir, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + if (desc) { + reg = readl(FLINTDMACR(flctl)); + reg |= DREQ0EN; + writel(reg, FLINTDMACR(flctl)); + + desc->callback = flctl_dma_complete; + desc->callback_param = flctl; + cookie = dmaengine_submit(desc); + + dma_async_issue_pending(chan); + } else { + /* DMA failed, fall back to PIO */ + flctl_release_dma(flctl); + dev_warn(&flctl->pdev->dev, + "DMA failed, falling back to PIO\n"); + ret = -EIO; + goto out; + } + + ret = + wait_for_completion_timeout(&flctl->dma_complete, + msecs_to_jiffies(3000)); + + if (ret <= 0) { + chan->device->device_control(chan, DMA_TERMINATE_ALL, 0); + dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n"); + } + +out: + reg = readl(FLINTDMACR(flctl)); + reg &= ~DREQ0EN; + writel(reg, FLINTDMACR(flctl)); + + dma_unmap_single(chan->device->dev, dma_addr, len, dir); + + /* ret > 0 is success */ + return ret; +} + static void read_datareg(struct sh_flctl *flctl, int offset) { unsigned long data; @@ -279,11 +425,20 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset) len_4align = (rlen + 3) / 4; + /* initiate DMA transfer */ + if (flctl->chan_fifo0_rx && rlen >= 32 && + flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_DEV_TO_MEM) > 0) + goto convert; /* DMA success */ + + /* do polling transfer */ for (i = 0; i < len_4align; i++) { wait_rfifo_ready(flctl); buf[i] = readl(FLDTFIFO(flctl)); - buf[i] = be32_to_cpu(buf[i]); } + +convert: + for (i = 0; i < len_4align; i++) + buf[i] = be32_to_cpu(buf[i]); } static enum flctl_ecc_res_t read_ecfiforeg @@ -325,9 +480,19 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, unsigned long *buf = (unsigned long *)&flctl->done_buff[offset]; len_4align = (rlen + 3) / 4; + + for (i = 0; i < len_4align; i++) + buf[i] = cpu_to_be32(buf[i]); + + /* initiate DMA transfer */ + if (flctl->chan_fifo0_tx && rlen >= 32 && + flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_MEM_TO_DEV) > 0) + return; /* DMA success */ + + /* do polling transfer */ for (i = 0; i < len_4align; i++) { wait_wecfifo_ready(flctl); - writel(cpu_to_be32(buf[i]), FLECFIFO(flctl)); + writel(buf[i], FLECFIFO(flctl)); } } @@ -925,6 +1090,8 @@ static int __devinit flctl_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); pm_runtime_resume(&pdev->dev); + flctl_setup_dma(flctl); + ret = nand_scan_ident(flctl_mtd, 1, NULL); if (ret) goto err_chip; @@ -942,6 +1109,7 @@ static int __devinit flctl_probe(struct platform_device *pdev) return 0; err_chip: + flctl_release_dma(flctl); pm_runtime_disable(&pdev->dev); free_irq(irq, flctl); err_flste: @@ -955,6 +1123,7 @@ static int __devexit flctl_remove(struct platform_device *pdev) { struct sh_flctl *flctl = platform_get_drvdata(pdev); + flctl_release_dma(flctl); nand_release(&flctl->mtd); pm_runtime_disable(&pdev->dev); free_irq(platform_get_irq(pdev, 0), flctl); diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index 481557688d05..1c28f8879b1c 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -20,6 +20,7 @@ #ifndef __SH_FLCTL_H__ #define __SH_FLCTL_H__ +#include #include #include #include @@ -107,6 +108,7 @@ #define ESTERINTE (0x1 << 24) /* ECC error interrupt enable */ #define AC1CLR (0x1 << 19) /* ECC FIFO clear */ #define AC0CLR (0x1 << 18) /* Data FIFO clear */ +#define DREQ0EN (0x1 << 16) /* FLDTFIFODMA Request Enable */ #define ECERB (0x1 << 9) /* ECC error */ #define STERB (0x1 << 8) /* Status error */ #define STERINTE (0x1 << 4) /* Status error enable */ @@ -138,6 +140,8 @@ enum flctl_ecc_res_t { FL_TIMEOUT }; +struct dma_chan; + struct sh_flctl { struct mtd_info mtd; struct nand_chip chip; @@ -161,6 +165,11 @@ struct sh_flctl { unsigned hwecc:1; /* Hardware ECC (0 = disabled, 1 = enabled) */ unsigned holden:1; /* Hardware has FLHOLDCR and HOLDEN is set */ unsigned qos_request:1; /* QoS request to prevent deep power shutdown */ + + /* DMA related objects */ + struct dma_chan *chan_fifo0_rx; + struct dma_chan *chan_fifo0_tx; + struct completion dma_complete; }; struct sh_flctl_platform_data { @@ -170,6 +179,9 @@ struct sh_flctl_platform_data { unsigned has_hwecc:1; unsigned use_holden:1; + + unsigned int slave_id_fifo0_tx; + unsigned int slave_id_fifo0_rx; }; static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo) -- cgit v1.2.3 From 9ef525a9141b14d23613faad303cf48a20814f1b Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 25 Oct 2012 09:43:10 -0400 Subject: mtd: Fix kernel-doc content to avoid warning. Add missing colons to fix kernel-doc generation warnings. Signed-off-by: Robert P. J. Day Signed-off-by: Artem Bityutskiy --- include/linux/mtd/nand.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 24e915957e4f..9d8a6048aacd 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -471,8 +471,8 @@ struct nand_buffers { * non 0 if ONFI supported. * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is * supported, 0 otherwise. - * @onfi_set_features [REPLACEABLE] set the features for ONFI nand - * @onfi_get_features [REPLACEABLE] get the features for ONFI nand + * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand + * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand * @ecclayout: [REPLACEABLE] the default ECC placement scheme * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash -- cgit v1.2.3 From 3e9ce49e0ef95e22790a74720f0068696b2477c9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Oct 2012 22:47:26 +0530 Subject: mtd: map: Fix compilation warning This patch is an attempt to fix following compilation warning. In file included from drivers/mtd/chips/cfi_cmdset_0001.c:35:0: drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wmaybe-uninitialized] I could have used uninitialized_var() too, but didn't used it as the final else part of map_word_load() is missing. So there is a chance that it might be passed uninitialized. Better initialize to zero. Signed-off-by: Viresh Kumar Signed-off-by: Artem Bityutskiy --- include/linux/mtd/map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 3595a0236b0f..56c7936e0c65 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -328,7 +328,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word static inline map_word map_word_load(struct map_info *map, const void *ptr) { - map_word r; + map_word r = {{0} }; if (map_bankwidth_is_1(map)) r.x[0] = *(unsigned char *)ptr; -- cgit v1.2.3 From ebd5ac165f2aaefb767c53112c2010b0ff3df688 Mon Sep 17 00:00:00 2001 From: Shinya Kuribayashi Date: Wed, 24 Oct 2012 19:58:10 +0900 Subject: i2c: i2c-sh_mobile: support I2C hardware block with a faster operating clock On newer SH-/R-Mobile SoCs, a clock supply to the I2C hardware block, which is used to generate the SCL clock output, is getting faster than before, while on the other hand, the SCL clock control registers, ICCH and ICCL, stay unchanged in 9-bit-wide (8+1). On such silicons, the internal SCL clock counter gets incremented every 2 clocks of the operating clock. This patch makes it configurable through platform data. Signed-off-by: Shinya Kuribayashi Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sh_mobile.c | 5 +++++ include/linux/i2c/i2c-sh_mobile.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 4dc0cc3611c2..4c283583bea0 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -120,6 +120,7 @@ struct sh_mobile_i2c_data { void __iomem *reg; struct i2c_adapter adap; unsigned long bus_speed; + unsigned int clks_per_count; struct clk *clk; u_int8_t icic; u_int8_t flags; @@ -231,6 +232,7 @@ static void sh_mobile_i2c_init(struct sh_mobile_i2c_data *pd) /* Get clock rate after clock is enabled */ clk_enable(pd->clk); i2c_clk_khz = clk_get_rate(pd->clk) / 1000; + i2c_clk_khz /= pd->clks_per_count; if (pd->bus_speed == STANDARD_MODE) { tLOW = 47; /* tLOW = 4.7 us */ @@ -658,6 +660,9 @@ static int sh_mobile_i2c_probe(struct platform_device *dev) pd->bus_speed = STANDARD_MODE; if (pdata && pdata->bus_speed) pd->bus_speed = pdata->bus_speed; + pd->clks_per_count = 1; + if (pdata && pdata->clks_per_count) + pd->clks_per_count = pdata->clks_per_count; /* The IIC blocks on SH-Mobile ARM processors * come with two new bits in ICIC. diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h index beda7081aead..06e3089795fb 100644 --- a/include/linux/i2c/i2c-sh_mobile.h +++ b/include/linux/i2c/i2c-sh_mobile.h @@ -5,6 +5,7 @@ struct i2c_sh_mobile_platform_data { unsigned long bus_speed; + unsigned int clks_per_count; }; #endif /* __I2C_SH_MOBILE_H__ */ -- cgit v1.2.3 From d611d41b46c96195b9a168a21992782458826e07 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Nov 2012 22:55:27 +0100 Subject: mtd: diskonchip: use inline functions for DocRead/DocWrite The diskonchip drivers traditionally use home-grown macros for doing MMIO accesses, which cause a lot of warnings, at least on ARM machines: drivers/mtd/devices/doc2000.c: In function 'doc_write': drivers/mtd/devices/doc2000.c:854:5: warning: value computed is not used [-Wunused-value] drivers/mtd/devices/doc2000.c: In function 'doc_erase': drivers/mtd/devices/doc2000.c:1123:5: warning: value computed is not used [-Wunused-value drivers/mtd/nand/diskonchip.c: In function 'doc2000_read_byte': drivers/mtd/nand/diskonchip.c:318:3: warning: value computed is not used [-Wunused-value] A nicer solution is to use the architecture-defined I/O accessors. Here, we use the __raw_readl/__raw_writel style, instead of the proper readl/writel ones, in order to preserve the odd semantics of the existing macros that have their own barrier implementation and no byte swap. It would be nice to fix this properly and use the correct accessors as well as make the word size independent from the architecture, but I guess the hardware is obsolete enough that we should better not mess the driver an more than necessary. Signed-off-by: Arnd Bergmann Signed-off-by: Artem Bityutskiy --- include/linux/mtd/doc2000.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h index 0f6fea73a1f6..407d1e556c39 100644 --- a/include/linux/mtd/doc2000.h +++ b/include/linux/mtd/doc2000.h @@ -92,12 +92,26 @@ * Others use readb/writeb */ #if defined(__arm__) -#define ReadDOC_(adr, reg) ((unsigned char)(*(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)))) -#define WriteDOC_(d, adr, reg) do{ *(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0) +static inline u8 ReadDOC_(u32 __iomem *addr, unsigned long reg) +{ + return __raw_readl(addr + reg); +} +static inline void WriteDOC_(u8 data, u32 __iomem *addr, unsigned long reg) +{ + __raw_writel(data, addr + reg); + wmb(); +} #define DOC_IOREMAP_LEN 0x8000 #elif defined(__ppc__) -#define ReadDOC_(adr, reg) ((unsigned char)(*(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)))) -#define WriteDOC_(d, adr, reg) do{ *(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0) +static inline u8 ReadDOC_(u16 __iomem *addr, unsigned long reg) +{ + return __raw_readw(addr + reg); +} +static inline void WriteDOC_(u8 data, u16 __iomem *addr, unsigned long reg) +{ + __raw_writew(data, addr + reg); + wmb(); +} #define DOC_IOREMAP_LEN 0x4000 #else #define ReadDOC_(adr, reg) readb((void __iomem *)(adr) + (reg)) -- cgit v1.2.3 From 5d27aa5af04f58f3020de1c224dcf8a62151fd58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Nov 2012 22:55:28 +0100 Subject: mtd: uninitialized variable warning in map.h The map_word_load() function initializes exactly as many words in the buffer as required, but gcc cannot figure this out and gives a misleading warning. Marking the local variable as uninitialized_var shuts up that warning. Without this patch, building acs5k_defconfig results in: drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_panic_write': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized] drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_write_words': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized] drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words': include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized] Signed-off-by: Arnd Bergmann Signed-off-by: Artem Bityutskiy --- include/linux/mtd/map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 56c7936e0c65..f6eb4332ac92 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -391,7 +391,7 @@ static inline map_word map_word_ff(struct map_info *map) static inline map_word inline_map_read(struct map_info *map, unsigned long ofs) { - map_word r; + map_word uninitialized_var(r); if (map_bankwidth_is_1(map)) r.x[0] = __raw_readb(map->virt + ofs); -- cgit v1.2.3 From 33a5f6261a61af28f7b4c86f9f958da0f206c914 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Oct 2012 17:52:56 +0200 Subject: nohz: Add API to check tick state We need some quick way to check if the CPU has stopped its tick. This will be useful to implement the printk tick using the irq work subsystem. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/tick.h | 17 ++++++++++++++++- kernel/time/tick-sched.c | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index f37fceb69b73..2307dd31b966 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -8,6 +8,8 @@ #include #include +#include +#include #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ +DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); + +static inline int tick_nohz_tick_stopped(void) +{ + return __this_cpu_read(tick_cpu_sched.tick_stopped); +} + extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else + +# else /* !CONFIG_NO_HZ */ +static inline int tick_nohz_tick_stopped(void) +{ + return 0; +} + static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a40260885265..9e945aa090ac 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -28,7 +28,7 @@ /* * Per cpu nohz control structure */ -static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); +DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); /* * The time, when the last jiffy update happened. Protected by xtime_lock. -- cgit v1.2.3 From 00b42959106a9ca1c2899e591ae4e9a83ad6af05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 7 Nov 2012 21:03:07 +0100 Subject: irq_work: Don't stop the tick with pending works Don't stop the tick if we have pending irq works on the queue, otherwise if the arch can't raise self-IPIs, we may not find an opportunity to execute the pending works for a while. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/irq_work.h | 6 ++++++ kernel/irq_work.c | 11 +++++++++++ kernel/time/tick-sched.c | 3 ++- 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6a9e8f5399e2..a69704f37204 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -20,4 +20,10 @@ bool irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); +#ifdef CONFIG_IRQ_WORK +bool irq_work_needs_cpu(void); +#else +static bool irq_work_needs_cpu(void) { return false; } +#endif + #endif /* _LINUX_IRQ_WORK_H */ diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 64eddd59ed83..b3c113a14727 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -99,6 +99,17 @@ bool irq_work_queue(struct irq_work *work) } EXPORT_SYMBOL_GPL(irq_work_queue); +bool irq_work_needs_cpu(void) +{ + struct llist_head *this_list; + + this_list = &__get_cpu_var(irq_work_list); + if (llist_empty(this_list)) + return false; + + return true; +} + /* * Run the irq_work entries on this cpu. Requires to be ran from hardirq * context with local IRQs disabled. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9e945aa090ac..f249e8c3e58e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -289,7 +290,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || - arch_needs_cpu(cpu)) { + arch_needs_cpu(cpu) || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { -- cgit v1.2.3 From 60a8f428320918458a9a21052777eada68eebfd8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:17 -0800 Subject: x86, mm: Move after_bootmem to mm_internel.h it is only used in arch/x86/mm/init*.c Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-41-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/mm_internal.h | 2 ++ include/linux/mm.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index dc79ac121774..6b563a118891 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -14,4 +14,6 @@ unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long page_size_mask); void zone_sizes_init(void); +extern int after_bootmem; + #endif /* __X86_MM_INTERNAL_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index bcaab4e6fe91..64d5271a3d36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1355,7 +1355,6 @@ extern void __init mmap_init(void); extern void show_mem(unsigned int flags); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); -extern int after_bootmem; extern __printf(3, 4) void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); -- cgit v1.2.3 From bc6679aef673f9dcb8f718528fc3df49ff661af9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 19 Oct 2012 16:43:41 -0400 Subject: irq_work: Make self-IPIs optable On irq work initialization, let the user choose to define it as "lazy" or not. "Lazy" means that we don't want to send an IPI (provided the arch can anyway) when we enqueue this work but we rather prefer to wait for the next timer tick to execute our work if possible. This is going to be a benefit for non-urgent enqueuers (like printk in the future) that may prefer not to raise an IPI storm in case of frequent enqueuing on short periods of time. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/irq_work.h | 14 ++++++++++++++ kernel/irq_work.c | 47 +++++++++++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index a69704f37204..b28eb60c8bf6 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -3,6 +3,20 @@ #include +/* + * An entry can be in one of four states: + * + * free NULL, 0 -> {claimed} : free to be used + * claimed NULL, 3 -> {pending} : claimed to be enqueued + * pending next, 3 -> {busy} : queued, pending callback + * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed + */ + +#define IRQ_WORK_PENDING 1UL +#define IRQ_WORK_BUSY 2UL +#define IRQ_WORK_FLAGS 3UL +#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ + struct irq_work { unsigned long flags; struct llist_node llnode; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 480f74715ba9..7f3a59bc8e3d 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -12,24 +12,15 @@ #include #include #include +#include +#include #include #include #include -/* - * An entry can be in one of four states: - * - * free NULL, 0 -> {claimed} : free to be used - * claimed NULL, 3 -> {pending} : claimed to be enqueued - * pending next, 3 -> {busy} : queued, pending callback - * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed - */ - -#define IRQ_WORK_PENDING 1UL -#define IRQ_WORK_BUSY 2UL -#define IRQ_WORK_FLAGS 3UL static DEFINE_PER_CPU(struct llist_head, irq_work_list); +static DEFINE_PER_CPU(int, irq_work_raised); /* * Claim the entry so that no one else will poke at it. @@ -69,14 +60,19 @@ void __weak arch_irq_work_raise(void) */ static void __irq_work_queue(struct irq_work *work) { - bool empty; - preempt_disable(); - empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); - /* The list was empty, raise self-interrupt to start processing. */ - if (empty) - arch_irq_work_raise(); + llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); + + /* + * If the work is not "lazy" or the tick is stopped, raise the irq + * work interrupt (if supported by the arch), otherwise, just wait + * for the next tick. + */ + if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { + if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) + arch_irq_work_raise(); + } preempt_enable(); } @@ -117,10 +113,19 @@ bool irq_work_needs_cpu(void) static void __irq_work_run(void) { + unsigned long flags; struct irq_work *work; struct llist_head *this_list; struct llist_node *llnode; + + /* + * Reset the "raised" state right before we check the list because + * an NMI may enqueue after we find the list empty from the runner. + */ + __this_cpu_write(irq_work_raised, 0); + barrier(); + this_list = &__get_cpu_var(irq_work_list); if (llist_empty(this_list)) return; @@ -140,13 +145,15 @@ static void __irq_work_run(void) * to claim that work don't rely on us to handle their data * while we are in the middle of the func. */ - xchg(&work->flags, IRQ_WORK_BUSY); + flags = work->flags & ~IRQ_WORK_PENDING; + xchg(&work->flags, flags); + work->func(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); + (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); } } -- cgit v1.2.3 From 74876a98a87a115254b3a66a14b27320b7f0acaa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Oct 2012 18:00:23 +0200 Subject: printk: Wake up klogd using irq_work klogd is woken up asynchronously from the tick in order to do it safely. However if printk is called when the tick is stopped, the reader won't be woken up until the next interrupt, which might not fire for a while. As a result, the user may miss some message. To fix this, lets implement the printk tick using a lazy irq work. This subsystem takes care of the timer tick state and can fix up accordingly. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- include/linux/printk.h | 3 --- init/Kconfig | 1 + kernel/printk.c | 36 ++++++++++++++++++++---------------- kernel/time/tick-sched.c | 2 +- kernel/timer.c | 1 - 5 files changed, 22 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 9afc01e5a0a6..86c4b6294713 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...) extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); -extern int printk_needs_cpu(int cpu); -extern void printk_tick(void); - #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) int vprintk_emit(int facility, int level, diff --git a/init/Kconfig b/init/Kconfig index cdc152c75727..c575566be47d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1196,6 +1196,7 @@ config HOTPLUG config PRINTK default y bool "Enable support for printk" if EXPERT + select IRQ_WORK help This option enables normal printk support. Removing it eliminates most of the message strings from the kernel image diff --git a/kernel/printk.c b/kernel/printk.c index 2d607f4d1797..c9104feba5ec 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -1955,30 +1956,32 @@ int is_console_locked(void) static DEFINE_PER_CPU(int, printk_pending); static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); -void printk_tick(void) +static void wake_up_klogd_work_func(struct irq_work *irq_work) { - if (__this_cpu_read(printk_pending)) { - int pending = __this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_SCHED) { - char *buf = __get_cpu_var(printk_sched_buf); - printk(KERN_WARNING "[sched_delayed] %s", buf); - } - if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible(&log_wait); + int pending = __this_cpu_xchg(printk_pending, 0); + + if (pending & PRINTK_PENDING_SCHED) { + char *buf = __get_cpu_var(printk_sched_buf); + printk(KERN_WARNING "[sched_delayed] %s", buf); } -} -int printk_needs_cpu(int cpu) -{ - if (cpu_is_offline(cpu)) - printk_tick(); - return __this_cpu_read(printk_pending); + if (pending & PRINTK_PENDING_WAKEUP) + wake_up_interruptible(&log_wait); } +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { + .func = wake_up_klogd_work_func, + .flags = IRQ_WORK_LAZY, +}; + void wake_up_klogd(void) { - if (waitqueue_active(&log_wait)) + preempt_disable(); + if (waitqueue_active(&log_wait)) { this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + } + preempt_enable(); } static void console_cont_flush(char *text, size_t size) @@ -2458,6 +2461,7 @@ int printk_sched(const char *fmt, ...) va_end(args); __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); local_irq_restore(flags); return r; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f249e8c3e58e..822d7572bf2d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -289,7 +289,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); - if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || arch_needs_cpu(cpu) || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; diff --git a/kernel/timer.c b/kernel/timer.c index 367d00858482..ff3b5165737b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1351,7 +1351,6 @@ void update_process_times(int user_tick) account_process_tick(p, user_tick); run_local_timers(); rcu_check_callbacks(cpu, user_tick); - printk_tick(); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_run(); -- cgit v1.2.3 From ea2ce92e44dc83b7a69c2aedd9c52bfe7fee1a62 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Tue, 9 Oct 2012 22:25:29 +0530 Subject: power_supply: Add support for CHARGE_CONTROL_* attributes Add support for power supply attributes CHARGE_CONTROL_LIMIT and CHARGE_CONTROL_LIMIT_MAX. These new attributes will enable the user space to implement custom charging algorithms based on platform state. Signed-off-by: Ramakrishna Pallala Signed-off-by: Anton Vorontsov --- Documentation/power/power_supply_class.txt | 3 +++ drivers/power/power_supply_sysfs.c | 2 ++ include/linux/power_supply.h | 2 ++ 3 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt index 9c647bd7c5a9..3f10b39b0346 100644 --- a/Documentation/power/power_supply_class.txt +++ b/Documentation/power/power_supply_class.txt @@ -123,6 +123,9 @@ CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger. CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the power supply object. +CHARGE_CONTROL_LIMIT - current charge control limit setting +CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting + ENERGY_FULL, ENERGY_EMPTY - same as above but for energy. CAPACITY - capacity in percents. diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c index 395c2cfa16c0..40fa3b7cae54 100644 --- a/drivers/power/power_supply_sysfs.c +++ b/drivers/power/power_supply_sysfs.c @@ -164,6 +164,8 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(constant_charge_current_max), POWER_SUPPLY_ATTR(constant_charge_voltage), POWER_SUPPLY_ATTR(constant_charge_voltage_max), + POWER_SUPPLY_ATTR(charge_control_limit), + POWER_SUPPLY_ATTR(charge_control_limit_max), POWER_SUPPLY_ATTR(energy_full_design), POWER_SUPPLY_ATTR(energy_empty_design), POWER_SUPPLY_ATTR(energy_full), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index e5ef45834c3c..445b4b249af5 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -114,6 +114,8 @@ enum power_supply_property { POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX, POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE, POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX, + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN, POWER_SUPPLY_PROP_ENERGY_FULL, -- cgit v1.2.3 From 952aeeb3ee28bc12a70744e40636de40688eb60d Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Tue, 9 Oct 2012 22:25:59 +0530 Subject: power_supply: Register power supply for thermal cooling device This patch registers the power supply as a cooling device if the power supply has support for charge throttling. Now with this change low level drivers need not register with thermal framework as it is automatically done by power supply framework. Signed-off-by: Ramakrishna Pallala Signed-off-by: Anton Vorontsov --- drivers/power/power_supply_core.c | 96 +++++++++++++++++++++++++++++++++++++++ include/linux/power_supply.h | 1 + 2 files changed, 97 insertions(+) (limited to 'include/linux') diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 2436f1350013..f984da1066ec 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -216,6 +216,86 @@ static void psy_unregister_thermal(struct power_supply *psy) return; thermal_zone_device_unregister(psy->tzd); } + +/* thermal cooling device callbacks */ +static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd, + unsigned long *state) +{ + struct power_supply *psy; + union power_supply_propval val; + int ret; + + psy = tcd->devdata; + ret = psy->get_property(psy, + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val); + if (!ret) + *state = val.intval; + + return ret; +} + +static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd, + unsigned long *state) +{ + struct power_supply *psy; + union power_supply_propval val; + int ret; + + psy = tcd->devdata; + ret = psy->get_property(psy, + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val); + if (!ret) + *state = val.intval; + + return ret; +} + +static int ps_set_cur_charge_cntl_limit(struct thermal_cooling_device *tcd, + unsigned long state) +{ + struct power_supply *psy; + union power_supply_propval val; + int ret; + + psy = tcd->devdata; + val.intval = state; + ret = psy->set_property(psy, + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val); + + return ret; +} + +static struct thermal_cooling_device_ops psy_tcd_ops = { + .get_max_state = ps_get_max_charge_cntl_limit, + .get_cur_state = ps_get_cur_chrage_cntl_limit, + .set_cur_state = ps_set_cur_charge_cntl_limit, +}; + +static int psy_register_cooler(struct power_supply *psy) +{ + int i; + + /* Register for cooling device if psy can control charging */ + for (i = 0; i < psy->num_properties; i++) { + if (psy->properties[i] == + POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT) { + psy->tcd = thermal_cooling_device_register( + (char *)psy->name, + psy, &psy_tcd_ops); + if (IS_ERR(psy->tcd)) + return PTR_ERR(psy->tcd); + break; + } + } + return 0; +} + +static void psy_unregister_cooler(struct power_supply *psy) +{ + if (IS_ERR_OR_NULL(psy->tcd)) + return; + thermal_cooling_device_unregister(psy->tcd); +} #else static int psy_register_thermal(struct power_supply *psy) { @@ -225,6 +305,15 @@ static int psy_register_thermal(struct power_supply *psy) static void psy_unregister_thermal(struct power_supply *psy) { } + +static int psy_register_cooler(struct power_supply *psy) +{ + return 0; +} + +static void psy_unregister_cooler(struct power_supply *psy) +{ +} #endif int power_supply_register(struct device *parent, struct power_supply *psy) @@ -259,6 +348,10 @@ int power_supply_register(struct device *parent, struct power_supply *psy) if (rc) goto register_thermal_failed; + rc = psy_register_cooler(psy); + if (rc) + goto register_cooler_failed; + rc = power_supply_create_triggers(psy); if (rc) goto create_triggers_failed; @@ -268,6 +361,8 @@ int power_supply_register(struct device *parent, struct power_supply *psy) goto success; create_triggers_failed: + psy_unregister_cooler(psy); +register_cooler_failed: psy_unregister_thermal(psy); register_thermal_failed: device_del(dev); @@ -284,6 +379,7 @@ void power_supply_unregister(struct power_supply *psy) cancel_work_sync(&psy->changed_work); sysfs_remove_link(&psy->dev->kobj, "powers"); power_supply_remove_triggers(psy); + psy_unregister_cooler(psy); psy_unregister_thermal(psy); device_unregister(psy->dev); } diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 445b4b249af5..1f0ab90aff00 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -188,6 +188,7 @@ struct power_supply { struct work_struct changed_work; #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd; + struct thermal_cooling_device *tcd; #endif #ifdef CONFIG_LEDS_TRIGGERS -- cgit v1.2.3 From 08d816b8cb09de1fd8268c8f1dbc97c3cc435d67 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Mon, 22 Oct 2012 00:18:54 +0000 Subject: lp8788-charger: Fix ADC channel names The name of ADC channel is configurable in the platform side. This name is referenced in the IIO consumer driver. To get the IIO channel, specific name in the platform data is used as an parameter of the iio_channel_get(). Thus, lp8788_adc_id platform data are replaced with specific names. Signed-off-by: Milo(Woogyom) Kim Signed-off-by: Anton Vorontsov --- drivers/power/lp8788-charger.c | 44 ++++++------------------------------------ include/linux/mfd/lp8788.h | 8 ++++---- 2 files changed, 10 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/lp8788-charger.c b/drivers/power/lp8788-charger.c index 1afa5f7a5359..fb592bfbec6e 100644 --- a/drivers/power/lp8788-charger.c +++ b/drivers/power/lp8788-charger.c @@ -584,50 +584,18 @@ static void lp8788_setup_adc_channel(const char *consumer_name, struct lp8788_charger *pchg) { struct lp8788_charger_platform_data *pdata = pchg->pdata; - struct device *dev = pchg->lp->dev; struct iio_channel *chan; - enum lp8788_adc_id id; - const char *chan_name[LPADC_MAX] = { - [LPADC_VBATT_5P5] = "vbatt-5p5", - [LPADC_VBATT_6P0] = "vbatt-6p0", - [LPADC_VBATT_5P0] = "vbatt-5p0", - [LPADC_ADC1] = "adc1", - [LPADC_ADC2] = "adc2", - [LPADC_ADC3] = "adc3", - [LPADC_ADC4] = "adc4", - }; if (!pdata) return; - id = pdata->vbatt_adc; - switch (id) { - case LPADC_VBATT_5P5: - case LPADC_VBATT_6P0: - case LPADC_VBATT_5P0: - chan = iio_channel_get(consumer_name, chan_name[id]); - pchg->chan[LP8788_VBATT] = IS_ERR(chan) ? NULL : chan; - break; - default: - dev_err(dev, "invalid ADC id for VBATT: %d\n", id); - pchg->chan[LP8788_VBATT] = NULL; - break; - } + /* ADC channel for battery voltage */ + chan = iio_channel_get(consumer_name, pdata->adc_vbatt); + pchg->chan[LP8788_VBATT] = IS_ERR(chan) ? NULL : chan; - id = pdata->batt_temp_adc; - switch (id) { - case LPADC_ADC1: - case LPADC_ADC2: - case LPADC_ADC3: - case LPADC_ADC4: - chan = iio_channel_get(consumer_name, chan_name[id]); - pchg->chan[LP8788_BATT_TEMP] = IS_ERR(chan) ? NULL : chan; - break; - default: - dev_err(dev, "invalid ADC id for BATT_TEMP : %d\n", id); - pchg->chan[LP8788_BATT_TEMP] = NULL; - break; - } + /* ADC channel for battery temperature */ + chan = iio_channel_get(consumer_name, pdata->adc_batt_temp); + pchg->chan[LP8788_BATT_TEMP] = IS_ERR(chan) ? NULL : chan; } static void lp8788_release_adc_channel(struct lp8788_charger *pchg) diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index cec364bdccfa..2a32b16f79cb 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -211,16 +211,16 @@ struct lp8788_chg_param { /* * struct lp8788_charger_platform_data - * @vbatt_adc : adc selection id for battery voltage - * @batt_temp_adc : adc selection id for battery temperature + * @adc_vbatt : adc channel name for battery voltage + * @adc_batt_temp : adc channel name for battery temperature * @max_vbatt_mv : used for calculating battery capacity * @chg_params : initial charging parameters * @num_chg_params : numbers of charging parameters * @charger_event : the charger event can be reported to the platform side */ struct lp8788_charger_platform_data { - enum lp8788_adc_id vbatt_adc; - enum lp8788_adc_id batt_temp_adc; + const char *adc_vbatt; + const char *adc_batt_temp; unsigned int max_vbatt_mv; struct lp8788_chg_param *chg_params; int num_chg_params; -- cgit v1.2.3 From 0bc98cc6155205b615a9d29a2d54d1b839521c04 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Fri, 2 Nov 2012 20:18:11 +0100 Subject: power_supply: Add bq2415x charger driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bq2415x charger driver is needed for example on Nokia N900 for charging battery. Driver is part of open source project to replace proprietary battery management. Signed-off-by: Pali Rohár Signed-off-by: Anton Vorontsov --- drivers/power/bq2415x_charger.c | 1644 +++++++++++++++++++++++++++++++++ include/linux/power/bq2415x_charger.h | 90 ++ 2 files changed, 1734 insertions(+) create mode 100644 drivers/power/bq2415x_charger.c create mode 100644 include/linux/power/bq2415x_charger.h (limited to 'include/linux') diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c new file mode 100644 index 000000000000..017b3c27f769 --- /dev/null +++ b/drivers/power/bq2415x_charger.c @@ -0,0 +1,1644 @@ +/* + bq2415x_charger.c - bq2415x charger driver + Copyright (C) 2011-2012 Pali Rohár + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +/* + Datasheets: + http://www.ti.com/product/bq24150 + http://www.ti.com/product/bq24150a + http://www.ti.com/product/bq24152 + http://www.ti.com/product/bq24153 + http://www.ti.com/product/bq24153a + http://www.ti.com/product/bq24155 +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* timeout for resetting chip timer */ +#define BQ2415X_TIMER_TIMEOUT 10 + +#define BQ2415X_REG_STATUS 0x00 +#define BQ2415X_REG_CONTROL 0x01 +#define BQ2415X_REG_VOLTAGE 0x02 +#define BQ2415X_REG_VENDER 0x03 +#define BQ2415X_REG_CURRENT 0x04 + +/* reset state for all registers */ +#define BQ2415X_RESET_STATUS BIT(6) +#define BQ2415X_RESET_CONTROL (BIT(4)|BIT(5)) +#define BQ2415X_RESET_VOLTAGE (BIT(1)|BIT(3)) +#define BQ2415X_RESET_CURRENT (BIT(0)|BIT(3)|BIT(7)) + +/* status register */ +#define BQ2415X_BIT_TMR_RST 7 +#define BQ2415X_BIT_OTG 7 +#define BQ2415X_BIT_EN_STAT 6 +#define BQ2415X_MASK_STAT (BIT(4)|BIT(5)) +#define BQ2415X_SHIFT_STAT 4 +#define BQ2415X_BIT_BOOST 3 +#define BQ2415X_MASK_FAULT (BIT(0)|BIT(1)|BIT(2)) +#define BQ2415X_SHIFT_FAULT 0 + +/* control register */ +#define BQ2415X_MASK_LIMIT (BIT(6)|BIT(7)) +#define BQ2415X_SHIFT_LIMIT 6 +#define BQ2415X_MASK_VLOWV (BIT(4)|BIT(5)) +#define BQ2415X_SHIFT_VLOWV 4 +#define BQ2415X_BIT_TE 3 +#define BQ2415X_BIT_CE 2 +#define BQ2415X_BIT_HZ_MODE 1 +#define BQ2415X_BIT_OPA_MODE 0 + +/* voltage register */ +#define BQ2415X_MASK_VO (BIT(2)|BIT(3)|BIT(4)|BIT(5)|BIT(6)|BIT(7)) +#define BQ2415X_SHIFT_VO 2 +#define BQ2415X_BIT_OTG_PL 1 +#define BQ2415X_BIT_OTG_EN 0 + +/* vender register */ +#define BQ2415X_MASK_VENDER (BIT(5)|BIT(6)|BIT(7)) +#define BQ2415X_SHIFT_VENDER 5 +#define BQ2415X_MASK_PN (BIT(3)|BIT(4)) +#define BQ2415X_SHIFT_PN 3 +#define BQ2415X_MASK_REVISION (BIT(0)|BIT(1)|BIT(2)) +#define BQ2415X_SHIFT_REVISION 0 + +/* current register */ +#define BQ2415X_MASK_RESET BIT(7) +#define BQ2415X_MASK_VI_CHRG (BIT(4)|BIT(5)|BIT(6)) +#define BQ2415X_SHIFT_VI_CHRG 4 +/* N/A BIT(3) */ +#define BQ2415X_MASK_VI_TERM (BIT(0)|BIT(1)|BIT(2)) +#define BQ2415X_SHIFT_VI_TERM 0 + + +enum bq2415x_command { + BQ2415X_TIMER_RESET, + BQ2415X_OTG_STATUS, + BQ2415X_STAT_PIN_STATUS, + BQ2415X_STAT_PIN_ENABLE, + BQ2415X_STAT_PIN_DISABLE, + BQ2415X_CHARGE_STATUS, + BQ2415X_BOOST_STATUS, + BQ2415X_FAULT_STATUS, + + BQ2415X_CHARGE_TERMINATION_STATUS, + BQ2415X_CHARGE_TERMINATION_ENABLE, + BQ2415X_CHARGE_TERMINATION_DISABLE, + BQ2415X_CHARGER_STATUS, + BQ2415X_CHARGER_ENABLE, + BQ2415X_CHARGER_DISABLE, + BQ2415X_HIGH_IMPEDANCE_STATUS, + BQ2415X_HIGH_IMPEDANCE_ENABLE, + BQ2415X_HIGH_IMPEDANCE_DISABLE, + BQ2415X_BOOST_MODE_STATUS, + BQ2415X_BOOST_MODE_ENABLE, + BQ2415X_BOOST_MODE_DISABLE, + + BQ2415X_OTG_LEVEL, + BQ2415X_OTG_ACTIVATE_HIGH, + BQ2415X_OTG_ACTIVATE_LOW, + BQ2415X_OTG_PIN_STATUS, + BQ2415X_OTG_PIN_ENABLE, + BQ2415X_OTG_PIN_DISABLE, + + BQ2415X_VENDER_CODE, + BQ2415X_PART_NUMBER, + BQ2415X_REVISION, +}; + +enum bq2415x_chip { + BQUNKNOWN, + BQ24150, + BQ24150A, + BQ24151, + BQ24151A, + BQ24152, + BQ24153, + BQ24153A, + BQ24155, + BQ24156, + BQ24156A, + BQ24158, +}; + +static char *bq2415x_chip_name[] = { + "unknown", + "bq24150", + "bq24150a", + "bq24151", + "bq24151a", + "bq24152", + "bq24153", + "bq24153a", + "bq24155", + "bq24156", + "bq24156a", + "bq24158", +}; + +struct bq2415x_device { + struct device *dev; + struct bq2415x_platform_data init_data; + struct power_supply charger; + struct delayed_work work; + enum bq2415x_mode reported_mode;/* mode reported by hook function */ + enum bq2415x_mode mode; /* current configured mode */ + enum bq2415x_chip chip; + const char *timer_error; + char *model; + char *name; + int autotimer; /* 1 - if driver automatically reset timer, 0 - not */ + int automode; /* 1 - enabled, 0 - disabled; -1 - not supported */ + int id; +}; + +/* each registered chip must have unique id */ +static DEFINE_IDR(bq2415x_id); + +static DEFINE_MUTEX(bq2415x_id_mutex); +static DEFINE_MUTEX(bq2415x_timer_mutex); +static DEFINE_MUTEX(bq2415x_i2c_mutex); + +/**** i2c read functions ****/ + +/* read value from register */ +static int bq2415x_i2c_read(struct bq2415x_device *bq, u8 reg) +{ + struct i2c_client *client = to_i2c_client(bq->dev); + struct i2c_msg msg[2]; + u8 val; + int ret; + + if (!client->adapter) + return -ENODEV; + + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].buf = ® + msg[0].len = sizeof(reg); + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].buf = &val; + msg[1].len = sizeof(val); + + mutex_lock(&bq2415x_i2c_mutex); + ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg)); + mutex_unlock(&bq2415x_i2c_mutex); + + if (ret < 0) + return ret; + + return val; +} + +/* read value from register, apply mask and right shift it */ +static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg, + u8 mask, u8 shift) +{ + int ret; + + if (shift > 8) + return -EINVAL; + + ret = bq2415x_i2c_read(bq, reg); + if (ret < 0) + return ret; + else + return (ret & mask) >> shift; +} + +/* read value from register and return one specified bit */ +static int bq2415x_i2c_read_bit(struct bq2415x_device *bq, u8 reg, u8 bit) +{ + if (bit > 8) + return -EINVAL; + else + return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit); +} + +/**** i2c write functions ****/ + +/* write value to register */ +static int bq2415x_i2c_write(struct bq2415x_device *bq, u8 reg, u8 val) +{ + struct i2c_client *client = to_i2c_client(bq->dev); + struct i2c_msg msg[1]; + u8 data[2]; + int ret; + + data[0] = reg; + data[1] = val; + + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].buf = data; + msg[0].len = ARRAY_SIZE(data); + + mutex_lock(&bq2415x_i2c_mutex); + ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg)); + mutex_unlock(&bq2415x_i2c_mutex); + + /* i2c_transfer returns number of messages transferred */ + if (ret < 0) + return ret; + else if (ret != 1) + return -EIO; + + return 0; +} + +/* read value from register, change it with mask left shifted and write back */ +static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val, + u8 mask, u8 shift) +{ + int ret; + + if (shift > 8) + return -EINVAL; + + ret = bq2415x_i2c_read(bq, reg); + if (ret < 0) + return ret; + + ret &= ~mask; + ret |= val << shift; + + return bq2415x_i2c_write(bq, reg, ret); +} + +/* change only one bit in register */ +static int bq2415x_i2c_write_bit(struct bq2415x_device *bq, u8 reg, + bool val, u8 bit) +{ + if (bit > 8) + return -EINVAL; + else + return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit); +} + +/**** global functions ****/ + +/* exec command function */ +static int bq2415x_exec_command(struct bq2415x_device *bq, + enum bq2415x_command command) +{ + int ret; + switch (command) { + case BQ2415X_TIMER_RESET: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, + 1, BQ2415X_BIT_TMR_RST); + case BQ2415X_OTG_STATUS: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS, + BQ2415X_BIT_OTG); + case BQ2415X_STAT_PIN_STATUS: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS, + BQ2415X_BIT_EN_STAT); + case BQ2415X_STAT_PIN_ENABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, 1, + BQ2415X_BIT_EN_STAT); + case BQ2415X_STAT_PIN_DISABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, 0, + BQ2415X_BIT_EN_STAT); + case BQ2415X_CHARGE_STATUS: + return bq2415x_i2c_read_mask(bq, BQ2415X_REG_STATUS, + BQ2415X_MASK_STAT, BQ2415X_SHIFT_STAT); + case BQ2415X_BOOST_STATUS: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS, + BQ2415X_BIT_BOOST); + case BQ2415X_FAULT_STATUS: + return bq2415x_i2c_read_mask(bq, BQ2415X_REG_STATUS, + BQ2415X_MASK_FAULT, BQ2415X_SHIFT_FAULT); + + case BQ2415X_CHARGE_TERMINATION_STATUS: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL, + BQ2415X_BIT_TE); + case BQ2415X_CHARGE_TERMINATION_ENABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 1, BQ2415X_BIT_TE); + case BQ2415X_CHARGE_TERMINATION_DISABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 0, BQ2415X_BIT_TE); + case BQ2415X_CHARGER_STATUS: + ret = bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL, + BQ2415X_BIT_CE); + if (ret < 0) + return ret; + else + return ret > 0 ? 0 : 1; + case BQ2415X_CHARGER_ENABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 0, BQ2415X_BIT_CE); + case BQ2415X_CHARGER_DISABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 1, BQ2415X_BIT_CE); + case BQ2415X_HIGH_IMPEDANCE_STATUS: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL, + BQ2415X_BIT_HZ_MODE); + case BQ2415X_HIGH_IMPEDANCE_ENABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 1, BQ2415X_BIT_HZ_MODE); + case BQ2415X_HIGH_IMPEDANCE_DISABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 0, BQ2415X_BIT_HZ_MODE); + case BQ2415X_BOOST_MODE_STATUS: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL, + BQ2415X_BIT_OPA_MODE); + case BQ2415X_BOOST_MODE_ENABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 1, BQ2415X_BIT_OPA_MODE); + case BQ2415X_BOOST_MODE_DISABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL, + 0, BQ2415X_BIT_OPA_MODE); + + case BQ2415X_OTG_LEVEL: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_VOLTAGE, + BQ2415X_BIT_OTG_PL); + case BQ2415X_OTG_ACTIVATE_HIGH: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE, + 1, BQ2415X_BIT_OTG_PL); + case BQ2415X_OTG_ACTIVATE_LOW: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE, + 0, BQ2415X_BIT_OTG_PL); + case BQ2415X_OTG_PIN_STATUS: + return bq2415x_i2c_read_bit(bq, BQ2415X_REG_VOLTAGE, + BQ2415X_BIT_OTG_EN); + case BQ2415X_OTG_PIN_ENABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE, + 1, BQ2415X_BIT_OTG_EN); + case BQ2415X_OTG_PIN_DISABLE: + return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE, + 0, BQ2415X_BIT_OTG_EN); + + case BQ2415X_VENDER_CODE: + return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER, + BQ2415X_MASK_VENDER, BQ2415X_SHIFT_VENDER); + case BQ2415X_PART_NUMBER: + return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER, + BQ2415X_MASK_PN, BQ2415X_SHIFT_PN); + case BQ2415X_REVISION: + return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER, + BQ2415X_MASK_REVISION, BQ2415X_SHIFT_REVISION); + + default: + return -EINVAL; + } +} + +/* detect chip type */ +static enum bq2415x_chip bq2415x_detect_chip(struct bq2415x_device *bq) +{ + struct i2c_client *client = to_i2c_client(bq->dev); + int ret = bq2415x_exec_command(bq, BQ2415X_PART_NUMBER); + + if (ret < 0) + return ret; + + switch (client->addr) { + case 0x6b: + switch (ret) { + case 0: + if (bq->chip == BQ24151A) + return bq->chip; + else + return BQ24151; + case 1: + if (bq->chip == BQ24150A || + bq->chip == BQ24152 || + bq->chip == BQ24155) + return bq->chip; + else + return BQ24150; + case 2: + if (bq->chip == BQ24153A) + return bq->chip; + else + return BQ24153; + default: + return BQUNKNOWN; + } + break; + + case 0x6a: + switch (ret) { + case 0: + if (bq->chip == BQ24156A) + return bq->chip; + else + return BQ24156; + case 2: + return BQ24158; + default: + return BQUNKNOWN; + } + break; + } + + return BQUNKNOWN; +} + +/* detect chip revision */ +static int bq2415x_detect_revision(struct bq2415x_device *bq) +{ + int ret = bq2415x_exec_command(bq, BQ2415X_REVISION); + int chip = bq2415x_detect_chip(bq); + if (ret < 0 || chip < 0) + return -1; + + switch (chip) { + case BQ24150: + case BQ24150A: + case BQ24151: + case BQ24151A: + case BQ24152: + if (ret >= 0 && ret <= 3) + return ret; + else + return -1; + + case BQ24153: + case BQ24153A: + case BQ24156: + case BQ24156A: + case BQ24158: + if (ret == 3) + return 0; + else if (ret == 1) + return 1; + else + return -1; + + case BQ24155: + if (ret == 3) + return 3; + else + return -1; + + case BQUNKNOWN: + return -1; + } + + return -1; +} + +/* return chip vender code */ +static int bq2415x_get_vender_code(struct bq2415x_device *bq) +{ + int ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE); + if (ret < 0) + return 0; + else /* convert to binary */ + return (ret & 0x1) + + ((ret >> 1) & 0x1) * 10 + + ((ret >> 2) & 0x1) * 100; +} + +/* reset all chip registers to default state */ +static void bq2415x_reset_chip(struct bq2415x_device *bq) +{ + bq2415x_i2c_write(bq, BQ2415X_REG_CURRENT, BQ2415X_RESET_CURRENT); + bq2415x_i2c_write(bq, BQ2415X_REG_VOLTAGE, BQ2415X_RESET_VOLTAGE); + bq2415x_i2c_write(bq, BQ2415X_REG_CONTROL, BQ2415X_RESET_CONTROL); + bq2415x_i2c_write(bq, BQ2415X_REG_STATUS, BQ2415X_RESET_STATUS); + bq->timer_error = NULL; +} + +/**** properties functions ****/ + +/* set current limit in mA */ +static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA) +{ + int val; + if (mA <= 100) + val = 0; + else if (mA <= 500) + val = 1; + else if (mA <= 800) + val = 2; + else + val = 3; + return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val, + BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT); +} + +/* get current limit in mA */ +static int bq2415x_get_current_limit(struct bq2415x_device *bq) +{ + int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL, + BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT); + if (ret < 0) + return ret; + else if (ret == 0) + return 100; + else if (ret == 1) + return 500; + else if (ret == 2) + return 800; + else if (ret == 3) + return 1800; + else + return -EINVAL; +} + +/* set weak battery voltage in mV */ +static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV) +{ + /* round to 100mV */ + int val; + if (mV <= 3400 + 50) + val = 0; + else if (mV <= 3500 + 50) + val = 1; + else if (mV <= 3600 + 50) + val = 2; + else + val = 3; + return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val, + BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV); +} + +/* get weak battery voltage in mV */ +static int bq2415x_get_weak_battery_voltage(struct bq2415x_device *bq) +{ + int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL, + BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV); + if (ret < 0) + return ret; + else + return 100 * (34 + ret); +} + +/* set battery regulation voltage in mV */ +static int bq2415x_set_battery_regulation_voltage(struct bq2415x_device *bq, + int mV) +{ + int val = (mV/10 - 350) / 2; + + if (val < 0) + val = 0; + else if (val > 94) /* FIXME: Max is 94 or 122 ? Set max value ? */ + return -EINVAL; + + return bq2415x_i2c_write_mask(bq, BQ2415X_REG_VOLTAGE, val, + BQ2415X_MASK_VO, BQ2415X_SHIFT_VO); +} + +/* get battery regulation voltage in mV */ +static int bq2415x_get_battery_regulation_voltage(struct bq2415x_device *bq) +{ + int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_VOLTAGE, + BQ2415X_MASK_VO, BQ2415X_SHIFT_VO); + if (ret < 0) + return ret; + else + return 10 * (350 + 2*ret); +} + +/* set charge current in mA (platform data must provide resistor sense) */ +static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA) +{ + int val; + if (bq->init_data.resistor_sense <= 0) + return -ENOSYS; + + val = (mA * bq->init_data.resistor_sense - 37400) / 6800; + + if (val < 0) + val = 0; + else if (val > 7) + val = 7; + + return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CURRENT, val, + BQ2415X_MASK_VI_CHRG | BQ2415X_MASK_RESET, + BQ2415X_SHIFT_VI_CHRG); +} + +/* get charge current in mA (platform data must provide resistor sense) */ +static int bq2415x_get_charge_current(struct bq2415x_device *bq) +{ + int ret; + if (bq->init_data.resistor_sense <= 0) + return -ENOSYS; + + ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CURRENT, + BQ2415X_MASK_VI_CHRG, BQ2415X_SHIFT_VI_CHRG); + if (ret < 0) + return ret; + else + return (37400 + 6800*ret) / bq->init_data.resistor_sense; +} + +/* set termination current in mA (platform data must provide resistor sense) */ +static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA) +{ + int val; + if (bq->init_data.resistor_sense <= 0) + return -ENOSYS; + + val = (mA * bq->init_data.resistor_sense - 3400) / 3400; + + if (val < 0) + val = 0; + else if (val > 7) + val = 7; + + return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CURRENT, val, + BQ2415X_MASK_VI_TERM | BQ2415X_MASK_RESET, + BQ2415X_SHIFT_VI_TERM); +} + +/* get termination current in mA (platform data must provide resistor sense) */ +static int bq2415x_get_termination_current(struct bq2415x_device *bq) +{ + int ret; + if (bq->init_data.resistor_sense <= 0) + return -ENOSYS; + + ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CURRENT, + BQ2415X_MASK_VI_TERM, BQ2415X_SHIFT_VI_TERM); + if (ret < 0) + return ret; + else + return (3400 + 3400*ret) / bq->init_data.resistor_sense; +} + +/* set default value of property */ +#define bq2415x_set_default_value(bq, prop) \ + do { \ + int ret = 0; \ + if (bq->init_data.prop != -1) \ + ret = bq2415x_set_##prop(bq, bq->init_data.prop); \ + if (ret < 0) \ + return ret; \ + } while (0) + +/* set default values of all properties */ +static int bq2415x_set_defaults(struct bq2415x_device *bq) +{ + bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE); + bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE); + bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_DISABLE); + bq2415x_set_default_value(bq, current_limit); + bq2415x_set_default_value(bq, weak_battery_voltage); + bq2415x_set_default_value(bq, battery_regulation_voltage); + if (bq->init_data.resistor_sense > 0) { + bq2415x_set_default_value(bq, charge_current); + bq2415x_set_default_value(bq, termination_current); + bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_ENABLE); + } + bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE); + return 0; +} + +/**** charger mode functions ****/ + +/* set charger mode */ +static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode) +{ + int ret = 0; + int charger = 0; + int boost = 0; + + if (mode == BQ2415X_MODE_HOST_CHARGER || + mode == BQ2415X_MODE_DEDICATED_CHARGER) + charger = 1; + + if (mode == BQ2415X_MODE_BOOST) + boost = 1; + + if (!charger) + ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE); + + if (!boost) + ret = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE); + + if (ret < 0) + return ret; + + switch (mode) { + case BQ2415X_MODE_NONE: + dev_dbg(bq->dev, "changing mode to: N/A\n"); + ret = bq2415x_set_current_limit(bq, 100); + break; + case BQ2415X_MODE_HOST_CHARGER: + dev_dbg(bq->dev, "changing mode to: Host/HUB charger\n"); + ret = bq2415x_set_current_limit(bq, 500); + break; + case BQ2415X_MODE_DEDICATED_CHARGER: + dev_dbg(bq->dev, "changing mode to: Dedicated charger\n"); + ret = bq2415x_set_current_limit(bq, 1800); + break; + case BQ2415X_MODE_BOOST: /* Boost mode */ + dev_dbg(bq->dev, "changing mode to: Boost\n"); + ret = bq2415x_set_current_limit(bq, 100); + break; + } + + if (ret < 0) + return ret; + + if (charger) + ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE); + else if (boost) + ret = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_ENABLE); + + if (ret < 0) + return ret; + + bq2415x_set_default_value(bq, weak_battery_voltage); + bq2415x_set_default_value(bq, battery_regulation_voltage); + + bq->mode = mode; + sysfs_notify(&bq->charger.dev->kobj, NULL, "mode"); + + return 0; + +} + +/* hook function called by other driver which set reported mode */ +static void bq2415x_hook_function(enum bq2415x_mode mode, void *data) +{ + struct bq2415x_device *bq = data; + + if (!bq) + return; + + dev_dbg(bq->dev, "hook function was called\n"); + bq->reported_mode = mode; + + /* if automode is not enabled do not tell about reported_mode */ + if (bq->automode < 1) + return; + + sysfs_notify(&bq->charger.dev->kobj, NULL, "reported_mode"); + bq2415x_set_mode(bq, bq->reported_mode); + +} + +/**** timer functions ****/ + +/* enable/disable auto resetting chip timer */ +static void bq2415x_set_autotimer(struct bq2415x_device *bq, int state) +{ + mutex_lock(&bq2415x_timer_mutex); + + if (bq->autotimer == state) { + mutex_unlock(&bq2415x_timer_mutex); + return; + } + + bq->autotimer = state; + + if (state) { + schedule_delayed_work(&bq->work, BQ2415X_TIMER_TIMEOUT * HZ); + bq2415x_exec_command(bq, BQ2415X_TIMER_RESET); + bq->timer_error = NULL; + } else { + cancel_delayed_work_sync(&bq->work); + } + + mutex_unlock(&bq2415x_timer_mutex); +} + +/* called by bq2415x_timer_work on timer error */ +static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg) +{ + bq->timer_error = msg; + sysfs_notify(&bq->charger.dev->kobj, NULL, "timer"); + dev_err(bq->dev, "%s\n", msg); + if (bq->automode > 0) + bq->automode = 0; + bq2415x_set_mode(bq, BQ2415X_MODE_NONE); + bq2415x_set_autotimer(bq, 0); +} + +/* delayed work function for auto resetting chip timer */ +static void bq2415x_timer_work(struct work_struct *work) +{ + struct bq2415x_device *bq = container_of(work, struct bq2415x_device, + work.work); + int ret, error, boost; + + if (!bq->autotimer) + return; + + ret = bq2415x_exec_command(bq, BQ2415X_TIMER_RESET); + if (ret < 0) { + bq2415x_timer_error(bq, "Resetting timer failed"); + return; + } + + boost = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_STATUS); + if (boost < 0) { + bq2415x_timer_error(bq, "Unknown error"); + return; + } + + error = bq2415x_exec_command(bq, BQ2415X_FAULT_STATUS); + if (error < 0) { + bq2415x_timer_error(bq, "Unknown error"); + return; + } + + if (boost) { + switch (error) { + /* Non fatal errors, chip is OK */ + case 0: /* No error */ + break; + case 6: /* Timer expired */ + dev_err(bq->dev, "Timer expired\n"); + break; + case 3: /* Battery voltage too low */ + dev_err(bq->dev, "Battery voltage to low\n"); + break; + + /* Fatal errors, disable and reset chip */ + case 1: /* Overvoltage protection (chip fried) */ + bq2415x_timer_error(bq, + "Overvoltage protection (chip fried)"); + return; + case 2: /* Overload */ + bq2415x_timer_error(bq, "Overload"); + return; + case 4: /* Battery overvoltage protection */ + bq2415x_timer_error(bq, + "Battery overvoltage protection"); + return; + case 5: /* Thermal shutdown (too hot) */ + bq2415x_timer_error(bq, + "Thermal shutdown (too hot)"); + return; + case 7: /* N/A */ + bq2415x_timer_error(bq, "Unknown error"); + return; + } + } else { + switch (error) { + /* Non fatal errors, chip is OK */ + case 0: /* No error */ + break; + case 2: /* Sleep mode */ + dev_err(bq->dev, "Sleep mode\n"); + break; + case 3: /* Poor input source */ + dev_err(bq->dev, "Poor input source\n"); + break; + case 6: /* Timer expired */ + dev_err(bq->dev, "Timer expired\n"); + break; + case 7: /* No battery */ + dev_err(bq->dev, "No battery\n"); + break; + + /* Fatal errors, disable and reset chip */ + case 1: /* Overvoltage protection (chip fried) */ + bq2415x_timer_error(bq, + "Overvoltage protection (chip fried)"); + return; + case 4: /* Battery overvoltage protection */ + bq2415x_timer_error(bq, + "Battery overvoltage protection"); + return; + case 5: /* Thermal shutdown (too hot) */ + bq2415x_timer_error(bq, + "Thermal shutdown (too hot)"); + return; + } + } + + schedule_delayed_work(&bq->work, BQ2415X_TIMER_TIMEOUT * HZ); +} + +/**** power supply interface code ****/ + +static enum power_supply_property bq2415x_power_supply_props[] = { + /* TODO: maybe add more power supply properties */ + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_MODEL_NAME, +}; + +static int bq2415x_power_supply_get_property(struct power_supply *psy, + enum power_supply_property psp, union power_supply_propval *val) +{ + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + int ret; + + switch (psp) { + case POWER_SUPPLY_PROP_STATUS: + ret = bq2415x_exec_command(bq, BQ2415X_CHARGE_STATUS); + if (ret < 0) + return ret; + else if (ret == 0) /* Ready */ + val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; + else if (ret == 1) /* Charge in progress */ + val->intval = POWER_SUPPLY_STATUS_CHARGING; + else if (ret == 2) /* Charge done */ + val->intval = POWER_SUPPLY_STATUS_FULL; + else + val->intval = POWER_SUPPLY_STATUS_UNKNOWN; + break; + case POWER_SUPPLY_PROP_MODEL_NAME: + val->strval = bq->model; + break; + default: + return -EINVAL; + } + return 0; +} + +static int bq2415x_power_supply_init(struct bq2415x_device *bq) +{ + int ret; + int chip; + char revstr[8]; + + bq->charger.name = bq->name; + bq->charger.type = POWER_SUPPLY_TYPE_USB; + bq->charger.properties = bq2415x_power_supply_props; + bq->charger.num_properties = ARRAY_SIZE(bq2415x_power_supply_props); + bq->charger.get_property = bq2415x_power_supply_get_property; + + ret = bq2415x_detect_chip(bq); + if (ret < 0) + chip = BQUNKNOWN; + else + chip = ret; + + ret = bq2415x_detect_revision(bq); + if (ret < 0) + strcpy(revstr, "unknown"); + else + sprintf(revstr, "1.%d", ret); + + bq->model = kasprintf(GFP_KERNEL, + "chip %s, revision %s, vender code %.3d", + bq2415x_chip_name[chip], revstr, + bq2415x_get_vender_code(bq)); + if (!bq->model) { + dev_err(bq->dev, "failed to allocate model name\n"); + return -ENOMEM; + } + + ret = power_supply_register(bq->dev, &bq->charger); + if (ret) { + kfree(bq->model); + return ret; + } + + return 0; +} + +static void bq2415x_power_supply_exit(struct bq2415x_device *bq) +{ + bq->autotimer = 0; + if (bq->automode > 0) + bq->automode = 0; + cancel_delayed_work_sync(&bq->work); + power_supply_unregister(&bq->charger); + kfree(bq->model); +} + +/**** additional sysfs entries for power supply interface ****/ + +/* show *_status entries */ +static ssize_t bq2415x_sysfs_show_status(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + enum bq2415x_command command; + int ret; + + if (strcmp(attr->attr.name, "otg_status") == 0) + command = BQ2415X_OTG_STATUS; + else if (strcmp(attr->attr.name, "charge_status") == 0) + command = BQ2415X_CHARGE_STATUS; + else if (strcmp(attr->attr.name, "boost_status") == 0) + command = BQ2415X_BOOST_STATUS; + else if (strcmp(attr->attr.name, "fault_status") == 0) + command = BQ2415X_FAULT_STATUS; + else + return -EINVAL; + + ret = bq2415x_exec_command(bq, command); + if (ret < 0) + return ret; + else + return sprintf(buf, "%d\n", ret); +} + +/* set timer entry: + auto - enable auto mode + off - disable auto mode + (other values) - reset chip timer +*/ +static ssize_t bq2415x_sysfs_set_timer(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + int ret = 0; + + if (strncmp(buf, "auto", 4) == 0) + bq2415x_set_autotimer(bq, 1); + else if (strncmp(buf, "off", 3) == 0) + bq2415x_set_autotimer(bq, 0); + else + ret = bq2415x_exec_command(bq, BQ2415X_TIMER_RESET); + + if (ret < 0) + return ret; + else + return count; +} + +/* show timer entry (auto or off) */ +static ssize_t bq2415x_sysfs_show_timer(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + + if (bq->timer_error) + return sprintf(buf, "%s\n", bq->timer_error); + + if (bq->autotimer) + return sprintf(buf, "auto\n"); + else + return sprintf(buf, "off\n"); +} + +/* set mode entry: + auto - if automode is supported, enable it and set mode to reported + none - disable charger and boost mode + host - charging mode for host/hub chargers (current limit 500mA) + dedicated - charging mode for dedicated chargers (unlimited current limit) + boost - disable charger and enable boost mode +*/ +static ssize_t bq2415x_sysfs_set_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + enum bq2415x_mode mode; + int ret = 0; + + if (strncmp(buf, "auto", 4) == 0) { + if (bq->automode < 0) + return -ENOSYS; + bq->automode = 1; + mode = bq->reported_mode; + } else if (strncmp(buf, "none", 4) == 0) { + if (bq->automode > 0) + bq->automode = 0; + mode = BQ2415X_MODE_NONE; + } else if (strncmp(buf, "host", 4) == 0) { + if (bq->automode > 0) + bq->automode = 0; + mode = BQ2415X_MODE_HOST_CHARGER; + } else if (strncmp(buf, "dedicated", 9) == 0) { + if (bq->automode > 0) + bq->automode = 0; + mode = BQ2415X_MODE_DEDICATED_CHARGER; + } else if (strncmp(buf, "boost", 5) == 0) { + if (bq->automode > 0) + bq->automode = 0; + mode = BQ2415X_MODE_BOOST; + } else if (strncmp(buf, "reset", 5) == 0) { + bq2415x_reset_chip(bq); + bq2415x_set_defaults(bq); + if (bq->automode <= 0) + return count; + bq->automode = 1; + mode = bq->reported_mode; + } else + return -EINVAL; + + ret = bq2415x_set_mode(bq, mode); + if (ret < 0) + return ret; + else + return count; +} + +/* show mode entry (auto, none, host, dedicated or boost) */ +static ssize_t bq2415x_sysfs_show_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + ssize_t ret = 0; + + if (bq->automode > 0) + ret += sprintf(buf+ret, "auto ("); + + switch (bq->mode) { + case BQ2415X_MODE_NONE: + ret += sprintf(buf+ret, "none"); + break; + case BQ2415X_MODE_HOST_CHARGER: + ret += sprintf(buf+ret, "host"); + break; + case BQ2415X_MODE_DEDICATED_CHARGER: + ret += sprintf(buf+ret, "dedicated"); + break; + case BQ2415X_MODE_BOOST: + ret += sprintf(buf+ret, "boost"); + break; + } + + if (bq->automode > 0) + ret += sprintf(buf+ret, ")"); + + ret += sprintf(buf+ret, "\n"); + return ret; +} + +/* show reported_mode entry (none, host, dedicated or boost) */ +static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + + if (bq->automode < 0) + return -EINVAL; + + switch (bq->reported_mode) { + case BQ2415X_MODE_NONE: + return sprintf(buf, "none\n"); + case BQ2415X_MODE_HOST_CHARGER: + return sprintf(buf, "host\n"); + case BQ2415X_MODE_DEDICATED_CHARGER: + return sprintf(buf, "dedicated\n"); + case BQ2415X_MODE_BOOST: + return sprintf(buf, "boost\n"); + } + + return -EINVAL; +} + +/* directly set raw value to chip register, format: 'register value' */ +static ssize_t bq2415x_sysfs_set_registers(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + ssize_t ret = 0; + unsigned int reg; + unsigned int val; + + if (sscanf(buf, "%x %x", ®, &val) != 2) + return -EINVAL; + + if (reg > 4 || val > 255) + return -EINVAL; + + ret = bq2415x_i2c_write(bq, reg, val); + if (ret < 0) + return ret; + else + return count; +} + +/* print value of chip register, format: 'register=value' */ +static ssize_t bq2415x_sysfs_print_reg(struct bq2415x_device *bq, + u8 reg, char *buf) +{ + int ret = bq2415x_i2c_read(bq, reg); + if (ret < 0) + return sprintf(buf, "%#.2x=error %d\n", reg, ret); + else + return sprintf(buf, "%#.2x=%#.2x\n", reg, ret); +} + +/* show all raw values of chip register, format per line: 'register=value' */ +static ssize_t bq2415x_sysfs_show_registers(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + ssize_t ret = 0; + + ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_STATUS, buf+ret); + ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_CONTROL, buf+ret); + ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_VOLTAGE, buf+ret); + ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_VENDER, buf+ret); + ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_CURRENT, buf+ret); + return ret; +} + +/* set current and voltage limit entries (in mA or mV) */ +static ssize_t bq2415x_sysfs_set_limit(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + long val; + int ret; + + if (kstrtol(buf, 10, &val) < 0) + return -EINVAL; + + if (strcmp(attr->attr.name, "current_limit") == 0) + ret = bq2415x_set_current_limit(bq, val); + else if (strcmp(attr->attr.name, "weak_battery_voltage") == 0) + ret = bq2415x_set_weak_battery_voltage(bq, val); + else if (strcmp(attr->attr.name, "battery_regulation_voltage") == 0) + ret = bq2415x_set_battery_regulation_voltage(bq, val); + else if (strcmp(attr->attr.name, "charge_current") == 0) + ret = bq2415x_set_charge_current(bq, val); + else if (strcmp(attr->attr.name, "termination_current") == 0) + ret = bq2415x_set_termination_current(bq, val); + else + return -EINVAL; + + if (ret < 0) + return ret; + else + return count; +} + +/* show current and voltage limit entries (in mA or mV) */ +static ssize_t bq2415x_sysfs_show_limit(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + int ret; + + if (strcmp(attr->attr.name, "current_limit") == 0) + ret = bq2415x_get_current_limit(bq); + else if (strcmp(attr->attr.name, "weak_battery_voltage") == 0) + ret = bq2415x_get_weak_battery_voltage(bq); + else if (strcmp(attr->attr.name, "battery_regulation_voltage") == 0) + ret = bq2415x_get_battery_regulation_voltage(bq); + else if (strcmp(attr->attr.name, "charge_current") == 0) + ret = bq2415x_get_charge_current(bq); + else if (strcmp(attr->attr.name, "termination_current") == 0) + ret = bq2415x_get_termination_current(bq); + else + return -EINVAL; + + if (ret < 0) + return ret; + else + return sprintf(buf, "%d\n", ret); +} + +/* set *_enable entries */ +static ssize_t bq2415x_sysfs_set_enable(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + enum bq2415x_command command; + long val; + int ret; + + if (kstrtol(buf, 10, &val) < 0) + return -EINVAL; + + if (strcmp(attr->attr.name, "charge_termination_enable") == 0) + command = val ? BQ2415X_CHARGE_TERMINATION_ENABLE : + BQ2415X_CHARGE_TERMINATION_DISABLE; + else if (strcmp(attr->attr.name, "high_impedance_enable") == 0) + command = val ? BQ2415X_HIGH_IMPEDANCE_ENABLE : + BQ2415X_HIGH_IMPEDANCE_DISABLE; + else if (strcmp(attr->attr.name, "otg_pin_enable") == 0) + command = val ? BQ2415X_OTG_PIN_ENABLE : + BQ2415X_OTG_PIN_DISABLE; + else if (strcmp(attr->attr.name, "stat_pin_enable") == 0) + command = val ? BQ2415X_STAT_PIN_ENABLE : + BQ2415X_STAT_PIN_DISABLE; + else + return -EINVAL; + + ret = bq2415x_exec_command(bq, command); + if (ret < 0) + return ret; + else + return count; +} + +/* show *_enable entries */ +static ssize_t bq2415x_sysfs_show_enable(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct power_supply *psy = dev_get_drvdata(dev); + struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, + charger); + enum bq2415x_command command; + int ret; + + if (strcmp(attr->attr.name, "charge_termination_enable") == 0) + command = BQ2415X_CHARGE_TERMINATION_STATUS; + else if (strcmp(attr->attr.name, "high_impedance_enable") == 0) + command = BQ2415X_HIGH_IMPEDANCE_STATUS; + else if (strcmp(attr->attr.name, "otg_pin_enable") == 0) + command = BQ2415X_OTG_PIN_STATUS; + else if (strcmp(attr->attr.name, "stat_pin_enable") == 0) + command = BQ2415X_STAT_PIN_STATUS; + else + return -EINVAL; + + ret = bq2415x_exec_command(bq, command); + if (ret < 0) + return ret; + else + return sprintf(buf, "%d\n", ret); +} + +static DEVICE_ATTR(current_limit, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit); +static DEVICE_ATTR(weak_battery_voltage, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit); +static DEVICE_ATTR(battery_regulation_voltage, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit); +static DEVICE_ATTR(charge_current, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit); +static DEVICE_ATTR(termination_current, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit); + +static DEVICE_ATTR(charge_termination_enable, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable); +static DEVICE_ATTR(high_impedance_enable, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable); +static DEVICE_ATTR(otg_pin_enable, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable); +static DEVICE_ATTR(stat_pin_enable, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable); + +static DEVICE_ATTR(reported_mode, S_IRUGO, + bq2415x_sysfs_show_reported_mode, NULL); +static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_mode, bq2415x_sysfs_set_mode); +static DEVICE_ATTR(timer, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_timer, bq2415x_sysfs_set_timer); + +static DEVICE_ATTR(registers, S_IWUSR | S_IRUGO, + bq2415x_sysfs_show_registers, bq2415x_sysfs_set_registers); + +static DEVICE_ATTR(otg_status, S_IRUGO, bq2415x_sysfs_show_status, NULL); +static DEVICE_ATTR(charge_status, S_IRUGO, bq2415x_sysfs_show_status, NULL); +static DEVICE_ATTR(boost_status, S_IRUGO, bq2415x_sysfs_show_status, NULL); +static DEVICE_ATTR(fault_status, S_IRUGO, bq2415x_sysfs_show_status, NULL); + +static struct attribute *bq2415x_sysfs_attributes[] = { + &dev_attr_current_limit.attr, + &dev_attr_weak_battery_voltage.attr, + &dev_attr_battery_regulation_voltage.attr, + &dev_attr_charge_current.attr, + &dev_attr_termination_current.attr, + + &dev_attr_charge_termination_enable.attr, + &dev_attr_high_impedance_enable.attr, + &dev_attr_otg_pin_enable.attr, + &dev_attr_stat_pin_enable.attr, + + &dev_attr_reported_mode.attr, + &dev_attr_mode.attr, + &dev_attr_timer.attr, + + &dev_attr_registers.attr, + + &dev_attr_otg_status.attr, + &dev_attr_charge_status.attr, + &dev_attr_boost_status.attr, + &dev_attr_fault_status.attr, + NULL, +}; + +static const struct attribute_group bq2415x_sysfs_attr_group = { + .attrs = bq2415x_sysfs_attributes, +}; + +static int bq2415x_sysfs_init(struct bq2415x_device *bq) +{ + return sysfs_create_group(&bq->charger.dev->kobj, + &bq2415x_sysfs_attr_group); +} + +static void bq2415x_sysfs_exit(struct bq2415x_device *bq) +{ + sysfs_remove_group(&bq->charger.dev->kobj, &bq2415x_sysfs_attr_group); +} + +/* main bq2415x probe function */ +static int bq2415x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + int num; + char *name; + struct bq2415x_device *bq; + + if (!client->dev.platform_data) { + dev_err(&client->dev, "platform data not set\n"); + return -ENODEV; + } + + /* Get new ID for the new device */ + ret = idr_pre_get(&bq2415x_id, GFP_KERNEL); + if (ret == 0) + return -ENOMEM; + + mutex_lock(&bq2415x_id_mutex); + ret = idr_get_new(&bq2415x_id, client, &num); + mutex_unlock(&bq2415x_id_mutex); + + if (ret < 0) + return ret; + + name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num); + if (!name) { + dev_err(&client->dev, "failed to allocate device name\n"); + ret = -ENOMEM; + goto error_1; + } + + bq = kzalloc(sizeof(*bq), GFP_KERNEL); + if (!bq) { + dev_err(&client->dev, "failed to allocate device data\n"); + ret = -ENOMEM; + goto error_2; + } + + i2c_set_clientdata(client, bq); + + bq->id = num; + bq->dev = &client->dev; + bq->chip = id->driver_data; + bq->name = name; + bq->mode = BQ2415X_MODE_NONE; + bq->reported_mode = BQ2415X_MODE_NONE; + bq->autotimer = 0; + bq->automode = 0; + + memcpy(&bq->init_data, client->dev.platform_data, + sizeof(bq->init_data)); + + bq2415x_reset_chip(bq); + + ret = bq2415x_power_supply_init(bq); + if (ret) { + dev_err(bq->dev, "failed to register power supply: %d\n", ret); + goto error_3; + } + + ret = bq2415x_sysfs_init(bq); + if (ret) { + dev_err(bq->dev, "failed to create sysfs entries: %d\n", ret); + goto error_4; + } + + ret = bq2415x_set_defaults(bq); + if (ret) { + dev_err(bq->dev, "failed to set default values: %d\n", ret); + goto error_5; + } + + if (bq->init_data.set_mode_hook) { + if (bq->init_data.set_mode_hook( + bq2415x_hook_function, bq)) { + bq->automode = 1; + bq2415x_set_mode(bq, bq->reported_mode); + dev_info(bq->dev, "automode enabled\n"); + } else { + bq->automode = -1; + dev_info(bq->dev, "automode failed\n"); + } + } else { + bq->automode = -1; + dev_info(bq->dev, "automode not supported\n"); + } + + INIT_DELAYED_WORK(&bq->work, bq2415x_timer_work); + bq2415x_set_autotimer(bq, 1); + + dev_info(bq->dev, "driver registered\n"); + return 0; + +error_5: + bq2415x_sysfs_exit(bq); +error_4: + bq2415x_power_supply_exit(bq); +error_3: + kfree(bq); +error_2: + kfree(name); +error_1: + mutex_lock(&bq2415x_id_mutex); + idr_remove(&bq2415x_id, num); + mutex_unlock(&bq2415x_id_mutex); + + return ret; +} + +/* main bq2415x remove function */ + +static int bq2415x_remove(struct i2c_client *client) +{ + struct bq2415x_device *bq = i2c_get_clientdata(client); + + if (bq->init_data.set_mode_hook) + bq->init_data.set_mode_hook(NULL, NULL); + + bq2415x_sysfs_exit(bq); + bq2415x_power_supply_exit(bq); + + bq2415x_reset_chip(bq); + + mutex_lock(&bq2415x_id_mutex); + idr_remove(&bq2415x_id, bq->id); + mutex_unlock(&bq2415x_id_mutex); + + dev_info(bq->dev, "driver unregistered\n"); + + kfree(bq->name); + kfree(bq); + + return 0; +} + +static const struct i2c_device_id bq2415x_i2c_id_table[] = { + { "bq2415x", BQUNKNOWN }, + { "bq24150", BQ24150 }, + { "bq24150a", BQ24150A }, + { "bq24151", BQ24151 }, + { "bq24151a", BQ24151A }, + { "bq24152", BQ24152 }, + { "bq24153", BQ24153 }, + { "bq24153a", BQ24153A }, + { "bq24155", BQ24155 }, + { "bq24156", BQ24156 }, + { "bq24156a", BQ24156A }, + { "bq24158", BQ24158 }, + {}, +}; +MODULE_DEVICE_TABLE(i2c, bq2415x_i2c_id_table); + +static struct i2c_driver bq2415x_driver = { + .driver = { + .name = "bq2415x-charger", + }, + .probe = bq2415x_probe, + .remove = bq2415x_remove, + .id_table = bq2415x_i2c_id_table, +}; + +static int __init bq2415x_init(void) +{ + return i2c_add_driver(&bq2415x_driver); +} +module_init(bq2415x_init); + +static void __exit bq2415x_exit(void) +{ + i2c_del_driver(&bq2415x_driver); +} +module_exit(bq2415x_exit); + +MODULE_AUTHOR("Pali Rohár "); +MODULE_DESCRIPTION("bq2415x charger driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h new file mode 100644 index 000000000000..fb6bf4d8aea8 --- /dev/null +++ b/include/linux/power/bq2415x_charger.h @@ -0,0 +1,90 @@ +/* + bq2415x_charger.h - bq2415x charger driver + Copyright (C) 2011-2012 Pali Rohár + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef BQ2415X_CHARGER_H +#define BQ2415X_CHARGER_H + +/* + This is platform data for bq2415x chip. It contains default board voltages + and currents which can be also later configured via sysfs. If value is -1 + then default chip value (specified in datasheet) will be used. + + Value resistor_sense is needed for for configuring charge and termination + current. It it is less or equal to zero, configuring charge and termination + current will not be possible. + + Function set_mode_hook is needed for automode (setting correct current limit + when charger is connected/disconnected or setting boost mode). When is NULL, + automode function is disabled. When is not NULL, it must have this prototype: + + int (*set_mode_hook)( + void (*hook)(enum bq2415x_mode mode, void *data), + void *data) + + hook is hook function (see below) and data is pointer to driver private data + + bq2415x driver will call it as: + + platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device); + + Board/platform function set_mode_hook return non zero value when hook + function was successful registered. Platform code should call that hook + function (which get from pointer, with data) every time when charger was + connected/disconnected or require to enable boost mode. bq2415x driver then + will set correct current limit, enable/disable charger or boost mode. + + Hook function has this prototype: + + void hook(enum bq2415x_mode mode, void *data); + + mode is bq2415x mode (charger or boost) + data is pointer to driver private data (which get from set_charger_type_hook) + + When bq driver is being unloaded, it call function: + + platform_data->set_mode_hook(NULL, NULL); + + (hook function and driver private data are NULL) + + After that board/platform code must not call driver hook function! It is + possible that pointer to hook function will not be valid and calling will + cause undefined result. + +*/ + +/* Supported modes with maximal current limit */ +enum bq2415x_mode { + BQ2415X_MODE_NONE, /* unknown or no charger (100mA) */ + BQ2415X_MODE_HOST_CHARGER, /* usb host/hub charger (500mA) */ + BQ2415X_MODE_DEDICATED_CHARGER, /* dedicated charger (unlimited) */ + BQ2415X_MODE_BOOST, /* boost mode (charging disabled) */ +}; + +struct bq2415x_platform_data { + int current_limit; /* mA */ + int weak_battery_voltage; /* mV */ + int battery_regulation_voltage; /* mV */ + int charge_current; /* mA */ + int termination_current; /* mA */ + int resistor_sense; /* m ohm */ + int (*set_mode_hook)(void (*hook)(enum bq2415x_mode mode, void *data), + void *data); +}; + +#endif -- cgit v1.2.3 From 6c47a3e00c6e4f3cdac7566c1480de34d9e32e07 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sun, 18 Nov 2012 15:35:32 -0800 Subject: bq2415x_charger: Fix style issues I hate doing these style fixups myself, but I also hate inconsitent code. Normally I just ask to resubmit the patch with the issues fixed, but N900 is special: I have a selfish interest in it. :) Signed-off-by: Anton Vorontsov --- drivers/power/bq2415x_charger.c | 268 +++++++++++++++++++--------------- include/linux/power/bq2415x_charger.h | 131 +++++++++-------- 2 files changed, 215 insertions(+), 184 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c index 017b3c27f769..ee842b37f462 100644 --- a/drivers/power/bq2415x_charger.c +++ b/drivers/power/bq2415x_charger.c @@ -1,31 +1,32 @@ /* - bq2415x_charger.c - bq2415x charger driver - Copyright (C) 2011-2012 Pali Rohár - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ + * bq2415x charger driver + * + * Copyright (C) 2011-2012 Pali Rohár + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ /* - Datasheets: - http://www.ti.com/product/bq24150 - http://www.ti.com/product/bq24150a - http://www.ti.com/product/bq24152 - http://www.ti.com/product/bq24153 - http://www.ti.com/product/bq24153a - http://www.ti.com/product/bq24155 -*/ + * Datasheets: + * http://www.ti.com/product/bq24150 + * http://www.ti.com/product/bq24150a + * http://www.ti.com/product/bq24152 + * http://www.ti.com/product/bq24153 + * http://www.ti.com/product/bq24153a + * http://www.ti.com/product/bq24155 + */ #include #include @@ -222,7 +223,7 @@ static int bq2415x_i2c_read(struct bq2415x_device *bq, u8 reg) /* read value from register, apply mask and right shift it */ static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg, - u8 mask, u8 shift) + u8 mask, u8 shift) { int ret; @@ -232,8 +233,7 @@ static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg, ret = bq2415x_i2c_read(bq, reg); if (ret < 0) return ret; - else - return (ret & mask) >> shift; + return (ret & mask) >> shift; } /* read value from register and return one specified bit */ @@ -241,8 +241,7 @@ static int bq2415x_i2c_read_bit(struct bq2415x_device *bq, u8 reg, u8 bit) { if (bit > 8) return -EINVAL; - else - return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit); + return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit); } /**** i2c write functions ****/ @@ -278,7 +277,7 @@ static int bq2415x_i2c_write(struct bq2415x_device *bq, u8 reg, u8 val) /* read value from register, change it with mask left shifted and write back */ static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val, - u8 mask, u8 shift) + u8 mask, u8 shift) { int ret; @@ -297,12 +296,11 @@ static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val, /* change only one bit in register */ static int bq2415x_i2c_write_bit(struct bq2415x_device *bq, u8 reg, - bool val, u8 bit) + bool val, u8 bit) { if (bit > 8) return -EINVAL; - else - return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit); + return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit); } /**** global functions ****/ @@ -312,6 +310,7 @@ static int bq2415x_exec_command(struct bq2415x_device *bq, enum bq2415x_command command) { int ret; + switch (command) { case BQ2415X_TIMER_RESET: return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, @@ -407,10 +406,8 @@ static int bq2415x_exec_command(struct bq2415x_device *bq, case BQ2415X_REVISION: return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER, BQ2415X_MASK_REVISION, BQ2415X_SHIFT_REVISION); - - default: - return -EINVAL; } + return -EINVAL; } /* detect chip type */ @@ -470,6 +467,7 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq) { int ret = bq2415x_exec_command(bq, BQ2415X_REVISION); int chip = bq2415x_detect_chip(bq); + if (ret < 0 || chip < 0) return -1; @@ -483,7 +481,6 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq) return ret; else return -1; - case BQ24153: case BQ24153A: case BQ24156: @@ -495,13 +492,11 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq) return 1; else return -1; - case BQ24155: if (ret == 3) return 3; else return -1; - case BQUNKNOWN: return -1; } @@ -512,13 +507,16 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq) /* return chip vender code */ static int bq2415x_get_vender_code(struct bq2415x_device *bq) { - int ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE); + int ret; + + ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE); if (ret < 0) return 0; - else /* convert to binary */ - return (ret & 0x1) + - ((ret >> 1) & 0x1) * 10 + - ((ret >> 2) & 0x1) * 100; + + /* convert to binary */ + return (ret & 0x1) + + ((ret >> 1) & 0x1) * 10 + + ((ret >> 2) & 0x1) * 100; } /* reset all chip registers to default state */ @@ -537,6 +535,7 @@ static void bq2415x_reset_chip(struct bq2415x_device *bq) static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA) { int val; + if (mA <= 100) val = 0; else if (mA <= 500) @@ -545,6 +544,7 @@ static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA) val = 2; else val = 3; + return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val, BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT); } @@ -552,7 +552,9 @@ static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA) /* get current limit in mA */ static int bq2415x_get_current_limit(struct bq2415x_device *bq) { - int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL, + int ret; + + ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL, BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT); if (ret < 0) return ret; @@ -564,15 +566,15 @@ static int bq2415x_get_current_limit(struct bq2415x_device *bq) return 800; else if (ret == 3) return 1800; - else - return -EINVAL; + return -EINVAL; } /* set weak battery voltage in mV */ static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV) { - /* round to 100mV */ int val; + + /* round to 100mV */ if (mV <= 3400 + 50) val = 0; else if (mV <= 3500 + 50) @@ -581,6 +583,7 @@ static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV) val = 2; else val = 3; + return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val, BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV); } @@ -588,17 +591,18 @@ static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV) /* get weak battery voltage in mV */ static int bq2415x_get_weak_battery_voltage(struct bq2415x_device *bq) { - int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL, + int ret; + + ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL, BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV); if (ret < 0) return ret; - else - return 100 * (34 + ret); + return 100 * (34 + ret); } /* set battery regulation voltage in mV */ static int bq2415x_set_battery_regulation_voltage(struct bq2415x_device *bq, - int mV) + int mV) { int val = (mV/10 - 350) / 2; @@ -616,21 +620,21 @@ static int bq2415x_get_battery_regulation_voltage(struct bq2415x_device *bq) { int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_VOLTAGE, BQ2415X_MASK_VO, BQ2415X_SHIFT_VO); + if (ret < 0) return ret; - else - return 10 * (350 + 2*ret); + return 10 * (350 + 2*ret); } /* set charge current in mA (platform data must provide resistor sense) */ static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA) { int val; + if (bq->init_data.resistor_sense <= 0) return -ENOSYS; val = (mA * bq->init_data.resistor_sense - 37400) / 6800; - if (val < 0) val = 0; else if (val > 7) @@ -645,6 +649,7 @@ static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA) static int bq2415x_get_charge_current(struct bq2415x_device *bq) { int ret; + if (bq->init_data.resistor_sense <= 0) return -ENOSYS; @@ -652,19 +657,18 @@ static int bq2415x_get_charge_current(struct bq2415x_device *bq) BQ2415X_MASK_VI_CHRG, BQ2415X_SHIFT_VI_CHRG); if (ret < 0) return ret; - else - return (37400 + 6800*ret) / bq->init_data.resistor_sense; + return (37400 + 6800*ret) / bq->init_data.resistor_sense; } /* set termination current in mA (platform data must provide resistor sense) */ static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA) { int val; + if (bq->init_data.resistor_sense <= 0) return -ENOSYS; val = (mA * bq->init_data.resistor_sense - 3400) / 3400; - if (val < 0) val = 0; else if (val > 7) @@ -679,6 +683,7 @@ static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA) static int bq2415x_get_termination_current(struct bq2415x_device *bq) { int ret; + if (bq->init_data.resistor_sense <= 0) return -ENOSYS; @@ -686,8 +691,7 @@ static int bq2415x_get_termination_current(struct bq2415x_device *bq) BQ2415X_MASK_VI_TERM, BQ2415X_SHIFT_VI_TERM); if (ret < 0) return ret; - else - return (3400 + 3400*ret) / bq->init_data.resistor_sense; + return (3400 + 3400*ret) / bq->init_data.resistor_sense; } /* set default value of property */ @@ -706,14 +710,17 @@ static int bq2415x_set_defaults(struct bq2415x_device *bq) bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE); bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE); bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_DISABLE); + bq2415x_set_default_value(bq, current_limit); bq2415x_set_default_value(bq, weak_battery_voltage); bq2415x_set_default_value(bq, battery_regulation_voltage); + if (bq->init_data.resistor_sense > 0) { bq2415x_set_default_value(bq, charge_current); bq2415x_set_default_value(bq, termination_current); bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_ENABLE); } + bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE); return 0; } @@ -844,8 +851,10 @@ static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg) static void bq2415x_timer_work(struct work_struct *work) { struct bq2415x_device *bq = container_of(work, struct bq2415x_device, - work.work); - int ret, error, boost; + work.work); + int ret; + int error; + int boost; if (!bq->autotimer) return; @@ -946,10 +955,11 @@ static enum power_supply_property bq2415x_power_supply_props[] = { }; static int bq2415x_power_supply_get_property(struct power_supply *psy, - enum power_supply_property psp, union power_supply_propval *val) + enum power_supply_property psp, + union power_supply_propval *val) { struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); int ret; switch (psp) { @@ -1031,7 +1041,8 @@ static void bq2415x_power_supply_exit(struct bq2415x_device *bq) /* show *_status entries */ static ssize_t bq2415x_sysfs_show_status(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, @@ -1053,17 +1064,19 @@ static ssize_t bq2415x_sysfs_show_status(struct device *dev, ret = bq2415x_exec_command(bq, command); if (ret < 0) return ret; - else - return sprintf(buf, "%d\n", ret); + return sprintf(buf, "%d\n", ret); } -/* set timer entry: - auto - enable auto mode - off - disable auto mode - (other values) - reset chip timer -*/ +/* + * set timer entry: + * auto - enable auto mode + * off - disable auto mode + * (other values) - reset chip timer + */ static ssize_t bq2415x_sysfs_set_timer(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) + struct device_attribute *attr, + const char *buf, + size_t count) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, @@ -1079,40 +1092,42 @@ static ssize_t bq2415x_sysfs_set_timer(struct device *dev, if (ret < 0) return ret; - else - return count; + return count; } /* show timer entry (auto or off) */ static ssize_t bq2415x_sysfs_show_timer(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); if (bq->timer_error) return sprintf(buf, "%s\n", bq->timer_error); if (bq->autotimer) return sprintf(buf, "auto\n"); - else - return sprintf(buf, "off\n"); + return sprintf(buf, "off\n"); } -/* set mode entry: - auto - if automode is supported, enable it and set mode to reported - none - disable charger and boost mode - host - charging mode for host/hub chargers (current limit 500mA) - dedicated - charging mode for dedicated chargers (unlimited current limit) - boost - disable charger and enable boost mode -*/ +/* + * set mode entry: + * auto - if automode is supported, enable it and set mode to reported + * none - disable charger and boost mode + * host - charging mode for host/hub chargers (current limit 500mA) + * dedicated - charging mode for dedicated chargers (unlimited current limit) + * boost - disable charger and enable boost mode + */ static ssize_t bq2415x_sysfs_set_mode(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) + struct device_attribute *attr, + const char *buf, + size_t count) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); enum bq2415x_mode mode; int ret = 0; @@ -1144,19 +1159,20 @@ static ssize_t bq2415x_sysfs_set_mode(struct device *dev, return count; bq->automode = 1; mode = bq->reported_mode; - } else + } else { return -EINVAL; + } ret = bq2415x_set_mode(bq, mode); if (ret < 0) return ret; - else - return count; + return count; } /* show mode entry (auto, none, host, dedicated or boost) */ static ssize_t bq2415x_sysfs_show_mode(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, @@ -1190,11 +1206,12 @@ static ssize_t bq2415x_sysfs_show_mode(struct device *dev, /* show reported_mode entry (none, host, dedicated or boost) */ static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); if (bq->automode < 0) return -EINVAL; @@ -1215,11 +1232,13 @@ static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev, /* directly set raw value to chip register, format: 'register value' */ static ssize_t bq2415x_sysfs_set_registers(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) + struct device_attribute *attr, + const char *buf, + size_t count) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); ssize_t ret = 0; unsigned int reg; unsigned int val; @@ -1233,28 +1252,29 @@ static ssize_t bq2415x_sysfs_set_registers(struct device *dev, ret = bq2415x_i2c_write(bq, reg, val); if (ret < 0) return ret; - else - return count; + return count; } /* print value of chip register, format: 'register=value' */ static ssize_t bq2415x_sysfs_print_reg(struct bq2415x_device *bq, - u8 reg, char *buf) + u8 reg, + char *buf) { int ret = bq2415x_i2c_read(bq, reg); + if (ret < 0) return sprintf(buf, "%#.2x=error %d\n", reg, ret); - else - return sprintf(buf, "%#.2x=%#.2x\n", reg, ret); + return sprintf(buf, "%#.2x=%#.2x\n", reg, ret); } /* show all raw values of chip register, format per line: 'register=value' */ static ssize_t bq2415x_sysfs_show_registers(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); ssize_t ret = 0; ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_STATUS, buf+ret); @@ -1267,11 +1287,13 @@ static ssize_t bq2415x_sysfs_show_registers(struct device *dev, /* set current and voltage limit entries (in mA or mV) */ static ssize_t bq2415x_sysfs_set_limit(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) + struct device_attribute *attr, + const char *buf, + size_t count) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); long val; int ret; @@ -1293,17 +1315,17 @@ static ssize_t bq2415x_sysfs_set_limit(struct device *dev, if (ret < 0) return ret; - else - return count; + return count; } /* show current and voltage limit entries (in mA or mV) */ static ssize_t bq2415x_sysfs_show_limit(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); int ret; if (strcmp(attr->attr.name, "current_limit") == 0) @@ -1321,17 +1343,18 @@ static ssize_t bq2415x_sysfs_show_limit(struct device *dev, if (ret < 0) return ret; - else - return sprintf(buf, "%d\n", ret); + return sprintf(buf, "%d\n", ret); } /* set *_enable entries */ static ssize_t bq2415x_sysfs_set_enable(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) + struct device_attribute *attr, + const char *buf, + size_t count) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); enum bq2415x_command command; long val; int ret; @@ -1357,17 +1380,17 @@ static ssize_t bq2415x_sysfs_set_enable(struct device *dev, ret = bq2415x_exec_command(bq, command); if (ret < 0) return ret; - else - return count; + return count; } /* show *_enable entries */ static ssize_t bq2415x_sysfs_show_enable(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct power_supply *psy = dev_get_drvdata(dev); struct bq2415x_device *bq = container_of(psy, struct bq2415x_device, - charger); + charger); enum bq2415x_command command; int ret; @@ -1385,8 +1408,7 @@ static ssize_t bq2415x_sysfs_show_enable(struct device *dev, ret = bq2415x_exec_command(bq, command); if (ret < 0) return ret; - else - return sprintf(buf, "%d\n", ret); + return sprintf(buf, "%d\n", ret); } static DEVICE_ATTR(current_limit, S_IWUSR | S_IRUGO, @@ -1425,6 +1447,10 @@ static DEVICE_ATTR(boost_status, S_IRUGO, bq2415x_sysfs_show_status, NULL); static DEVICE_ATTR(fault_status, S_IRUGO, bq2415x_sysfs_show_status, NULL); static struct attribute *bq2415x_sysfs_attributes[] = { + /* + * TODO: some (appropriate) of these attrs should be switched to + * use power supply class props. + */ &dev_attr_current_limit.attr, &dev_attr_weak_battery_voltage.attr, &dev_attr_battery_regulation_voltage.attr, @@ -1466,7 +1492,7 @@ static void bq2415x_sysfs_exit(struct bq2415x_device *bq) /* main bq2415x probe function */ static int bq2415x_probe(struct i2c_client *client, - const struct i2c_device_id *id) + const struct i2c_device_id *id) { int ret; int num; diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h index fb6bf4d8aea8..97a1665eaeaf 100644 --- a/include/linux/power/bq2415x_charger.h +++ b/include/linux/power/bq2415x_charger.h @@ -1,72 +1,77 @@ /* - bq2415x_charger.h - bq2415x charger driver - Copyright (C) 2011-2012 Pali Rohár - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ + * bq2415x charger driver + * + * Copyright (C) 2011-2012 Pali Rohár + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ #ifndef BQ2415X_CHARGER_H #define BQ2415X_CHARGER_H /* - This is platform data for bq2415x chip. It contains default board voltages - and currents which can be also later configured via sysfs. If value is -1 - then default chip value (specified in datasheet) will be used. - - Value resistor_sense is needed for for configuring charge and termination - current. It it is less or equal to zero, configuring charge and termination - current will not be possible. - - Function set_mode_hook is needed for automode (setting correct current limit - when charger is connected/disconnected or setting boost mode). When is NULL, - automode function is disabled. When is not NULL, it must have this prototype: - - int (*set_mode_hook)( - void (*hook)(enum bq2415x_mode mode, void *data), - void *data) - - hook is hook function (see below) and data is pointer to driver private data - - bq2415x driver will call it as: - - platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device); - - Board/platform function set_mode_hook return non zero value when hook - function was successful registered. Platform code should call that hook - function (which get from pointer, with data) every time when charger was - connected/disconnected or require to enable boost mode. bq2415x driver then - will set correct current limit, enable/disable charger or boost mode. - - Hook function has this prototype: - - void hook(enum bq2415x_mode mode, void *data); - - mode is bq2415x mode (charger or boost) - data is pointer to driver private data (which get from set_charger_type_hook) - - When bq driver is being unloaded, it call function: - - platform_data->set_mode_hook(NULL, NULL); - - (hook function and driver private data are NULL) - - After that board/platform code must not call driver hook function! It is - possible that pointer to hook function will not be valid and calling will - cause undefined result. - -*/ + * This is platform data for bq2415x chip. It contains default board + * voltages and currents which can be also later configured via sysfs. If + * value is -1 then default chip value (specified in datasheet) will be + * used. + * + * Value resistor_sense is needed for for configuring charge and + * termination current. It it is less or equal to zero, configuring charge + * and termination current will not be possible. + * + * Function set_mode_hook is needed for automode (setting correct current + * limit when charger is connected/disconnected or setting boost mode). + * When is NULL, automode function is disabled. When is not NULL, it must + * have this prototype: + * + * int (*set_mode_hook)( + * void (*hook)(enum bq2415x_mode mode, void *data), + * void *data) + * + * hook is hook function (see below) and data is pointer to driver private + * data + * + * bq2415x driver will call it as: + * + * platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device); + * + * Board/platform function set_mode_hook return non zero value when hook + * function was successful registered. Platform code should call that hook + * function (which get from pointer, with data) every time when charger + * was connected/disconnected or require to enable boost mode. bq2415x + * driver then will set correct current limit, enable/disable charger or + * boost mode. + * + * Hook function has this prototype: + * + * void hook(enum bq2415x_mode mode, void *data); + * + * mode is bq2415x mode (charger or boost) + * data is pointer to driver private data (which get from + * set_charger_type_hook) + * + * When bq driver is being unloaded, it call function: + * + * platform_data->set_mode_hook(NULL, NULL); + * + * (hook function and driver private data are NULL) + * + * After that board/platform code must not call driver hook function! It + * is possible that pointer to hook function will not be valid and calling + * will cause undefined result. + */ /* Supported modes with maximal current limit */ enum bq2415x_mode { -- cgit v1.2.3 From e0f1abeba5c2d8a2183566717d99294fd1a29c2e Mon Sep 17 00:00:00 2001 From: "Rajanikanth H.V" Date: Sun, 18 Nov 2012 18:45:41 -0800 Subject: ab8500: Add devicetree support for fuelgauge - This patch adds device tree support for fuelgauge driver - optimize bm devices platform_data usage and of_probe(...) Note: of_probe() routine for battery managed devices is made common across all bm drivers. - test status: - interrupt numbers assigned differs between legacy and FDT mode. Signed-off-by: Rajanikanth H.V Signed-off-by: Anton Vorontsov --- Documentation/devicetree/bindings/mfd/ab8500.txt | 7 +- .../devicetree/bindings/power_supply/ab8500/fg.txt | 58 +++ arch/arm/boot/dts/dbx5x0.dtsi | 12 +- drivers/mfd/ab8500-core.c | 5 + drivers/power/Makefile | 2 +- drivers/power/ab8500_bmdata.c | 521 +++++++++++++++++++++ drivers/power/ab8500_btemp.c | 16 +- drivers/power/ab8500_charger.c | 16 +- drivers/power/ab8500_fg.c | 82 ++-- drivers/power/abx500_chargalg.c | 8 +- include/linux/mfd/abx500.h | 36 +- 11 files changed, 667 insertions(+), 96 deletions(-) create mode 100644 Documentation/devicetree/bindings/power_supply/ab8500/fg.txt create mode 100644 drivers/power/ab8500_bmdata.c (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt index ce83c8d3c00e..6ca8d817ef92 100644 --- a/Documentation/devicetree/bindings/mfd/ab8500.txt +++ b/Documentation/devicetree/bindings/mfd/ab8500.txt @@ -24,7 +24,12 @@ ab8500-bm : : : Battery Manager ab8500-btemp : : : Battery Temperature ab8500-charger : : : Battery Charger ab8500-codec : : : Audio Codec -ab8500-fg : : : Fuel Gauge +ab8500-fg : : vddadc : Fuel Gauge + : NCONV_ACCU : : Accumulate N Sample Conversion + : BATT_OVV : : Battery Over Voltage + : LOW_BAT_F : : LOW threshold battery voltage + : CC_INT_CALIB : : Coulomb Counter Internal Calibration + : CCEOC : : Coulomb Counter End of Conversion ab8500-gpadc : HW_CONV_END : vddadc : Analogue to Digital Converter SW_CONV_END : : ab8500-gpio : : : GPIO Controller diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt new file mode 100644 index 000000000000..ccafcb9112fb --- /dev/null +++ b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt @@ -0,0 +1,58 @@ +=== AB8500 Fuel Gauge Driver === + +AB8500 is a mixed signal multimedia and power management +device comprising: power and energy-management-module, +wall-charger, usb-charger, audio codec, general purpose adc, +tvout, clock management and sim card interface. + +Fuelgauge support is part of energy-management-modules, other +components of this module are: +main-charger, usb-combo-charger and battery-temperature-monitoring. + +The properties below describes the node for fuelgauge driver. + +Required Properties: +- compatible = This shall be: "stericsson,ab8500-fg" +- battery = Shall be battery specific information + Example: + ab8500_fg { + compatible = "stericsson,ab8500-fg"; + battery = <&ab8500_battery>; + }; + +dependent node: + ab8500_battery: ab8500_battery { + }; + This node will provide information on 'thermistor interface' and + 'battery technology type' used. + +Properties of this node are: +thermistor-on-batctrl: + A boolean value indicating thermistor interface to battery + + Note: + 'btemp' and 'batctrl' are the pins interfaced for battery temperature + measurement, 'btemp' signal is used when NTC(negative temperature + coefficient) resister is interfaced external to battery whereas + 'batctrl' pin is used when NTC resister is internal to battery. + + Example: + ab8500_battery: ab8500_battery { + thermistor-on-batctrl; + }; + indicates: NTC resister is internal to battery, 'batctrl' is used + for thermal measurement. + + The absence of property 'thermal-on-batctrl' indicates + NTC resister is external to battery and 'btemp' signal is used + for thermal measurement. + +battery-type: + This shall be the battery manufacturing technology type, + allowed types are: + "UNKNOWN" "NiMH" "LION" "LIPO" "LiFe" "NiCd" "LiMn" + Example: + ab8500_battery: ab8500_battery { + stericsson,battery-type = "LIPO"; + } + diff --git a/arch/arm/boot/dts/dbx5x0.dtsi b/arch/arm/boot/dts/dbx5x0.dtsi index 4b0e0ca08f40..0c81986904c5 100644 --- a/arch/arm/boot/dts/dbx5x0.dtsi +++ b/arch/arm/boot/dts/dbx5x0.dtsi @@ -352,7 +352,17 @@ vddadc-supply = <&ab8500_ldo_tvout_reg>; }; - ab8500-usb { + ab8500_battery: ab8500_battery { + stericsson,battery-type = "LIPO"; + thermistor-on-batctrl; + }; + + ab8500_fg { + compatible = "stericsson,ab8500-fg"; + battery = <&ab8500_battery>; + }; + + ab8500_usb { compatible = "stericsson,ab8500-usb"; interrupts = < 90 0x4 96 0x4 diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index 1667c77b5cde..7c3017ba73e4 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -1051,8 +1051,13 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = { }, { .name = "ab8500-fg", + .of_compatible = "stericsson,ab8500-fg", .num_resources = ARRAY_SIZE(ab8500_fg_resources), .resources = ab8500_fg_resources, +#ifndef CONFIG_OF + .platform_data = &ab8500_bm_data, + .pdata_size = sizeof(ab8500_bm_data), +#endif }, { .name = "ab8500-chargalg", diff --git a/drivers/power/Makefile b/drivers/power/Makefile index 74dfd9570154..696e3a960b3e 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -38,7 +38,7 @@ obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o obj-$(CONFIG_BATTERY_JZ4740) += jz4740-battery.o obj-$(CONFIG_BATTERY_INTEL_MID) += intel_mid_battery.o obj-$(CONFIG_BATTERY_RX51) += rx51_battery.o -obj-$(CONFIG_AB8500_BM) += ab8500_charger.o ab8500_btemp.o ab8500_fg.o abx500_chargalg.o +obj-$(CONFIG_AB8500_BM) += ab8500_bmdata.o ab8500_charger.o ab8500_btemp.o ab8500_fg.o abx500_chargalg.o obj-$(CONFIG_CHARGER_ISP1704) += isp1704_charger.o obj-$(CONFIG_CHARGER_MAX8903) += max8903_charger.o obj-$(CONFIG_CHARGER_TWL4030) += twl4030_charger.o diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c new file mode 100644 index 000000000000..e7639b6659f7 --- /dev/null +++ b/drivers/power/ab8500_bmdata.c @@ -0,0 +1,521 @@ +#include +#include +#include +#include +#include +#include + +/* + * These are the defined batteries that uses a NTC and ID resistor placed + * inside of the battery pack. + * Note that the res_to_temp table must be strictly sorted by falling resistance + * values to work. + */ +static struct abx500_res_to_temp temp_tbl_A_thermistor[] = { + {-5, 53407}, + { 0, 48594}, + { 5, 43804}, + {10, 39188}, + {15, 34870}, + {20, 30933}, + {25, 27422}, + {30, 24347}, + {35, 21694}, + {40, 19431}, + {45, 17517}, + {50, 15908}, + {55, 14561}, + {60, 13437}, + {65, 12500}, +}; + +static struct abx500_res_to_temp temp_tbl_B_thermistor[] = { + {-5, 165418}, + { 0, 159024}, + { 5, 151921}, + {10, 144300}, + {15, 136424}, + {20, 128565}, + {25, 120978}, + {30, 113875}, + {35, 107397}, + {40, 101629}, + {45, 96592}, + {50, 92253}, + {55, 88569}, + {60, 85461}, + {65, 82869}, +}; + +static struct abx500_v_to_cap cap_tbl_A_thermistor[] = { + {4171, 100}, + {4114, 95}, + {4009, 83}, + {3947, 74}, + {3907, 67}, + {3863, 59}, + {3830, 56}, + {3813, 53}, + {3791, 46}, + {3771, 33}, + {3754, 25}, + {3735, 20}, + {3717, 17}, + {3681, 13}, + {3664, 8}, + {3651, 6}, + {3635, 5}, + {3560, 3}, + {3408, 1}, + {3247, 0}, +}; + +static struct abx500_v_to_cap cap_tbl_B_thermistor[] = { + {4161, 100}, + {4124, 98}, + {4044, 90}, + {4003, 85}, + {3966, 80}, + {3933, 75}, + {3888, 67}, + {3849, 60}, + {3813, 55}, + {3787, 47}, + {3772, 30}, + {3751, 25}, + {3718, 20}, + {3681, 16}, + {3660, 14}, + {3589, 10}, + {3546, 7}, + {3495, 4}, + {3404, 2}, + {3250, 0}, +}; + +static struct abx500_v_to_cap cap_tbl[] = { + {4186, 100}, + {4163, 99}, + {4114, 95}, + {4068, 90}, + {3990, 80}, + {3926, 70}, + {3898, 65}, + {3866, 60}, + {3833, 55}, + {3812, 50}, + {3787, 40}, + {3768, 30}, + {3747, 25}, + {3730, 20}, + {3705, 15}, + {3699, 14}, + {3684, 12}, + {3672, 9}, + {3657, 7}, + {3638, 6}, + {3556, 4}, + {3424, 2}, + {3317, 1}, + {3094, 0}, +}; + +/* + * Note that the res_to_temp table must be strictly sorted by falling + * resistance values to work. + */ +static struct abx500_res_to_temp temp_tbl[] = { + {-5, 214834}, + { 0, 162943}, + { 5, 124820}, + {10, 96520}, + {15, 75306}, + {20, 59254}, + {25, 47000}, + {30, 37566}, + {35, 30245}, + {40, 24520}, + {45, 20010}, + {50, 16432}, + {55, 13576}, + {60, 11280}, + {65, 9425}, +}; + +/* + * Note that the batres_vs_temp table must be strictly sorted by falling + * temperature values to work. + */ +static struct batres_vs_temp temp_to_batres_tbl_thermistor[] = { + { 40, 120}, + { 30, 135}, + { 20, 165}, + { 10, 230}, + { 00, 325}, + {-10, 445}, + {-20, 595}, +}; + +/* + * Note that the batres_vs_temp table must be strictly sorted by falling + * temperature values to work. + */ +static struct batres_vs_temp temp_to_batres_tbl_ext_thermistor[] = { + { 60, 300}, + { 30, 300}, + { 20, 300}, + { 10, 300}, + { 00, 300}, + {-10, 300}, + {-20, 300}, +}; + +/* battery resistance table for LI ION 9100 battery */ +static struct batres_vs_temp temp_to_batres_tbl_9100[] = { + { 60, 180}, + { 30, 180}, + { 20, 180}, + { 10, 180}, + { 00, 180}, + {-10, 180}, + {-20, 180}, +}; + +static struct abx500_battery_type bat_type_thermistor[] = { +[BATTERY_UNKNOWN] = { + /* First element always represent the UNKNOWN battery */ + .name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN, + .resis_high = 0, + .resis_low = 0, + .battery_resistance = 300, + .charge_full_design = 612, + .nominal_voltage = 3700, + .termination_vol = 4050, + .termination_curr = 200, + .recharge_vol = 3990, + .normal_cur_lvl = 400, + .normal_vol_lvl = 4100, + .maint_a_cur_lvl = 400, + .maint_a_vol_lvl = 4050, + .maint_a_chg_timer_h = 60, + .maint_b_cur_lvl = 400, + .maint_b_vol_lvl = 4000, + .maint_b_chg_timer_h = 200, + .low_high_cur_lvl = 300, + .low_high_vol_lvl = 4000, + .n_temp_tbl_elements = ARRAY_SIZE(temp_tbl), + .r_to_t_tbl = temp_tbl, + .n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl), + .v_to_cap_tbl = cap_tbl, + .n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor), + .batres_tbl = temp_to_batres_tbl_thermistor, +}, +{ + .name = POWER_SUPPLY_TECHNOLOGY_LIPO, + .resis_high = 53407, + .resis_low = 12500, + .battery_resistance = 300, + .charge_full_design = 900, + .nominal_voltage = 3600, + .termination_vol = 4150, + .termination_curr = 80, + .recharge_vol = 4130, + .normal_cur_lvl = 700, + .normal_vol_lvl = 4200, + .maint_a_cur_lvl = 600, + .maint_a_vol_lvl = 4150, + .maint_a_chg_timer_h = 60, + .maint_b_cur_lvl = 600, + .maint_b_vol_lvl = 4100, + .maint_b_chg_timer_h = 200, + .low_high_cur_lvl = 300, + .low_high_vol_lvl = 4000, + .n_temp_tbl_elements = ARRAY_SIZE(temp_tbl_A_thermistor), + .r_to_t_tbl = temp_tbl_A_thermistor, + .n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl_A_thermistor), + .v_to_cap_tbl = cap_tbl_A_thermistor, + .n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor), + .batres_tbl = temp_to_batres_tbl_thermistor, + +}, +{ + .name = POWER_SUPPLY_TECHNOLOGY_LIPO, + .resis_high = 165418, + .resis_low = 82869, + .battery_resistance = 300, + .charge_full_design = 900, + .nominal_voltage = 3600, + .termination_vol = 4150, + .termination_curr = 80, + .recharge_vol = 4130, + .normal_cur_lvl = 700, + .normal_vol_lvl = 4200, + .maint_a_cur_lvl = 600, + .maint_a_vol_lvl = 4150, + .maint_a_chg_timer_h = 60, + .maint_b_cur_lvl = 600, + .maint_b_vol_lvl = 4100, + .maint_b_chg_timer_h = 200, + .low_high_cur_lvl = 300, + .low_high_vol_lvl = 4000, + .n_temp_tbl_elements = ARRAY_SIZE(temp_tbl_B_thermistor), + .r_to_t_tbl = temp_tbl_B_thermistor, + .n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl_B_thermistor), + .v_to_cap_tbl = cap_tbl_B_thermistor, + .n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor), + .batres_tbl = temp_to_batres_tbl_thermistor, +}, +}; + +static struct abx500_battery_type bat_type_ext_thermistor[] = { +[BATTERY_UNKNOWN] = { + /* First element always represent the UNKNOWN battery */ + .name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN, + .resis_high = 0, + .resis_low = 0, + .battery_resistance = 300, + .charge_full_design = 612, + .nominal_voltage = 3700, + .termination_vol = 4050, + .termination_curr = 200, + .recharge_vol = 3990, + .normal_cur_lvl = 400, + .normal_vol_lvl = 4100, + .maint_a_cur_lvl = 400, + .maint_a_vol_lvl = 4050, + .maint_a_chg_timer_h = 60, + .maint_b_cur_lvl = 400, + .maint_b_vol_lvl = 4000, + .maint_b_chg_timer_h = 200, + .low_high_cur_lvl = 300, + .low_high_vol_lvl = 4000, + .n_temp_tbl_elements = ARRAY_SIZE(temp_tbl), + .r_to_t_tbl = temp_tbl, + .n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl), + .v_to_cap_tbl = cap_tbl, + .n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor), + .batres_tbl = temp_to_batres_tbl_thermistor, +}, +/* + * These are the batteries that doesn't have an internal NTC resistor to measure + * its temperature. The temperature in this case is measure with a NTC placed + * near the battery but on the PCB. + */ +{ + .name = POWER_SUPPLY_TECHNOLOGY_LIPO, + .resis_high = 76000, + .resis_low = 53000, + .battery_resistance = 300, + .charge_full_design = 900, + .nominal_voltage = 3700, + .termination_vol = 4150, + .termination_curr = 100, + .recharge_vol = 4130, + .normal_cur_lvl = 700, + .normal_vol_lvl = 4200, + .maint_a_cur_lvl = 600, + .maint_a_vol_lvl = 4150, + .maint_a_chg_timer_h = 60, + .maint_b_cur_lvl = 600, + .maint_b_vol_lvl = 4100, + .maint_b_chg_timer_h = 200, + .low_high_cur_lvl = 300, + .low_high_vol_lvl = 4000, + .n_temp_tbl_elements = ARRAY_SIZE(temp_tbl), + .r_to_t_tbl = temp_tbl, + .n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl), + .v_to_cap_tbl = cap_tbl, + .n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor), + .batres_tbl = temp_to_batres_tbl_thermistor, +}, +{ + .name = POWER_SUPPLY_TECHNOLOGY_LION, + .resis_high = 30000, + .resis_low = 10000, + .battery_resistance = 300, + .charge_full_design = 950, + .nominal_voltage = 3700, + .termination_vol = 4150, + .termination_curr = 100, + .recharge_vol = 4130, + .normal_cur_lvl = 700, + .normal_vol_lvl = 4200, + .maint_a_cur_lvl = 600, + .maint_a_vol_lvl = 4150, + .maint_a_chg_timer_h = 60, + .maint_b_cur_lvl = 600, + .maint_b_vol_lvl = 4100, + .maint_b_chg_timer_h = 200, + .low_high_cur_lvl = 300, + .low_high_vol_lvl = 4000, + .n_temp_tbl_elements = ARRAY_SIZE(temp_tbl), + .r_to_t_tbl = temp_tbl, + .n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl), + .v_to_cap_tbl = cap_tbl, + .n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor), + .batres_tbl = temp_to_batres_tbl_thermistor, +}, +{ + .name = POWER_SUPPLY_TECHNOLOGY_LION, + .resis_high = 95000, + .resis_low = 76001, + .battery_resistance = 300, + .charge_full_design = 950, + .nominal_voltage = 3700, + .termination_vol = 4150, + .termination_curr = 100, + .recharge_vol = 4130, + .normal_cur_lvl = 700, + .normal_vol_lvl = 4200, + .maint_a_cur_lvl = 600, + .maint_a_vol_lvl = 4150, + .maint_a_chg_timer_h = 60, + .maint_b_cur_lvl = 600, + .maint_b_vol_lvl = 4100, + .maint_b_chg_timer_h = 200, + .low_high_cur_lvl = 300, + .low_high_vol_lvl = 4000, + .n_temp_tbl_elements = ARRAY_SIZE(temp_tbl), + .r_to_t_tbl = temp_tbl, + .n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl), + .v_to_cap_tbl = cap_tbl, + .n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor), + .batres_tbl = temp_to_batres_tbl_thermistor, +}, +}; + +static const struct abx500_bm_capacity_levels cap_levels = { + .critical = 2, + .low = 10, + .normal = 70, + .high = 95, + .full = 100, +}; + +static const struct abx500_fg_parameters fg = { + .recovery_sleep_timer = 10, + .recovery_total_time = 100, + .init_timer = 1, + .init_discard_time = 5, + .init_total_time = 40, + .high_curr_time = 60, + .accu_charging = 30, + .accu_high_curr = 30, + .high_curr_threshold = 50, + .lowbat_threshold = 3100, + .battok_falling_th_sel0 = 2860, + .battok_raising_th_sel1 = 2860, + .user_cap_limit = 15, + .maint_thres = 97, +}; + +static const struct abx500_maxim_parameters maxi_params = { + .ena_maxi = true, + .chg_curr = 910, + .wait_cycles = 10, + .charger_curr_step = 100, +}; + +static const struct abx500_bm_charger_parameters chg = { + .usb_volt_max = 5500, + .usb_curr_max = 1500, + .ac_volt_max = 7500, + .ac_curr_max = 1500, +}; + +struct abx500_bm_data ab8500_bm_data = { + .temp_under = 3, + .temp_low = 8, + .temp_high = 43, + .temp_over = 48, + .main_safety_tmr_h = 4, + .temp_interval_chg = 20, + .temp_interval_nochg = 120, + .usb_safety_tmr_h = 4, + .bkup_bat_v = BUP_VCH_SEL_2P6V, + .bkup_bat_i = BUP_ICH_SEL_150UA, + .no_maintenance = false, + .adc_therm = ABx500_ADC_THERM_BATCTRL, + .chg_unknown_bat = false, + .enable_overshoot = false, + .fg_res = 100, + .cap_levels = &cap_levels, + .bat_type = bat_type_thermistor, + .n_btypes = 3, + .batt_id = 0, + .interval_charging = 5, + .interval_not_charging = 120, + .temp_hysteresis = 3, + .gnd_lift_resistance = 34, + .maxi = &maxi_params, + .chg_params = &chg, + .fg_params = &fg, +}; + +int __devinit +bmdevs_of_probe(struct device *dev, + struct device_node *np, + struct abx500_bm_data **battery) +{ + struct abx500_battery_type *btype; + struct device_node *np_bat_supply; + struct abx500_bm_data *bat; + const char *btech; + char bat_tech[8]; + int i, thermistor; + + *battery = &ab8500_bm_data; + + /* get phandle to 'battery-info' node */ + np_bat_supply = of_parse_phandle(np, "battery", 0); + if (!np_bat_supply) { + dev_err(dev, "missing property battery\n"); + return -EINVAL; + } + if (of_property_read_bool(np_bat_supply, + "thermistor-on-batctrl")) + thermistor = NTC_INTERNAL; + else + thermistor = NTC_EXTERNAL; + + bat = *battery; + if (thermistor == NTC_EXTERNAL) { + bat->n_btypes = 4; + bat->bat_type = bat_type_ext_thermistor; + bat->adc_therm = ABx500_ADC_THERM_BATTEMP; + } + btech = of_get_property(np_bat_supply, + "stericsson,battery-type", NULL); + if (!btech) { + dev_warn(dev, "missing property battery-name/type\n"); + strcpy(bat_tech, "UNKNOWN"); + } else { + strcpy(bat_tech, btech); + } + + if (strncmp(bat_tech, "LION", 4) == 0) { + bat->no_maintenance = true; + bat->chg_unknown_bat = true; + bat->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600; + bat->bat_type[BATTERY_UNKNOWN].termination_vol = 4150; + bat->bat_type[BATTERY_UNKNOWN].recharge_vol = 4130; + bat->bat_type[BATTERY_UNKNOWN].normal_cur_lvl = 520; + bat->bat_type[BATTERY_UNKNOWN].normal_vol_lvl = 4200; + } + /* select the battery resolution table */ + for (i = 0; i < bat->n_btypes; ++i) { + btype = (bat->bat_type + i); + if (thermistor == NTC_EXTERNAL) { + btype->batres_tbl = + temp_to_batres_tbl_ext_thermistor; + } else if (strncmp(bat_tech, "LION", 4) == 0) { + btype->batres_tbl = + temp_to_batres_tbl_9100; + } else { + btype->batres_tbl = + temp_to_batres_tbl_thermistor; + } + } + of_node_put(np_bat_supply); + return 0; +} diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c index e3b6395b20dd..abc2abc16a5d 100644 --- a/drivers/power/ab8500_btemp.c +++ b/drivers/power/ab8500_btemp.c @@ -93,7 +93,7 @@ struct ab8500_btemp { struct ab8500 *parent; struct ab8500_gpadc *gpadc; struct ab8500_fg *fg; - struct abx500_btemp_platform_data *pdata; + struct abx500_bmdevs_plat_data *pdata; struct abx500_bm_data *bat; struct power_supply btemp_psy; struct ab8500_btemp_events events; @@ -962,10 +962,10 @@ static int __devexit ab8500_btemp_remove(struct platform_device *pdev) static int __devinit ab8500_btemp_probe(struct platform_device *pdev) { + struct abx500_bmdevs_plat_data *plat_data = pdev->dev.platform_data; + struct ab8500_btemp *di; int irq, i, ret = 0; u8 val; - struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data; - struct ab8500_btemp *di; if (!plat_data) { dev_err(&pdev->dev, "No platform data\n"); @@ -982,21 +982,13 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev) di->gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); /* get btemp specific platform data */ - di->pdata = plat_data->btemp; + di->pdata = plat_data; if (!di->pdata) { dev_err(di->dev, "no btemp platform data supplied\n"); ret = -EINVAL; goto free_device_info; } - /* get battery specific platform data */ - di->bat = plat_data->battery; - if (!di->bat) { - dev_err(di->dev, "no battery platform data supplied\n"); - ret = -EINVAL; - goto free_device_info; - } - /* BTEMP supply */ di->btemp_psy.name = "ab8500_btemp"; di->btemp_psy.type = POWER_SUPPLY_TYPE_BATTERY; diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index 26ff759e2220..723edb47b1d8 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -220,7 +220,7 @@ struct ab8500_charger { bool autopower; struct ab8500 *parent; struct ab8500_gpadc *gpadc; - struct abx500_charger_platform_data *pdata; + struct abx500_bmdevs_plat_data *pdata; struct abx500_bm_data *bat; struct ab8500_charger_event_flags flags; struct ab8500_charger_usb_state usb_state; @@ -2533,9 +2533,9 @@ static int __devexit ab8500_charger_remove(struct platform_device *pdev) static int __devinit ab8500_charger_probe(struct platform_device *pdev) { - int irq, i, charger_status, ret = 0; - struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data; + struct abx500_bmdevs_plat_data *plat_data = pdev->dev.platform_data; struct ab8500_charger *di; + int irq, i, charger_status, ret = 0; if (!plat_data) { dev_err(&pdev->dev, "No platform data\n"); @@ -2555,21 +2555,13 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev) spin_lock_init(&di->usb_state.usb_lock); /* get charger specific platform data */ - di->pdata = plat_data->charger; + di->pdata = plat_data; if (!di->pdata) { dev_err(di->dev, "no charger platform data supplied\n"); ret = -EINVAL; goto free_device_info; } - /* get battery specific platform data */ - di->bat = plat_data->battery; - if (!di->bat) { - dev_err(di->dev, "no battery platform data supplied\n"); - ret = -EINVAL; - goto free_device_info; - } - di->autopower = false; /* AC supply */ diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 2db8cc254399..ed62ef788eb5 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -22,15 +22,16 @@ #include #include #include -#include -#include #include -#include #include -#include -#include #include +#include #include +#include +#include +#include +#include +#include #define MILLI_TO_MICRO 1000 #define FG_LSB_IN_MA 1627 @@ -172,7 +173,6 @@ struct inst_curr_result_list { * @avg_cap: Average capacity filter * @parent: Pointer to the struct ab8500 * @gpadc: Pointer to the struct gpadc - * @pdata: Pointer to the abx500_fg platform data * @bat: Pointer to the abx500_bm platform data * @fg_psy: Structure that holds the FG specific battery properties * @fg_wq: Work queue for running the FG algorithm @@ -212,7 +212,6 @@ struct ab8500_fg { struct ab8500_fg_avg_cap avg_cap; struct ab8500 *parent; struct ab8500_gpadc *gpadc; - struct abx500_fg_platform_data *pdata; struct abx500_bm_data *bat; struct power_supply fg_psy; struct workqueue_struct *fg_wq; @@ -2429,7 +2428,6 @@ static int __devexit ab8500_fg_remove(struct platform_device *pdev) flush_scheduled_work(); power_supply_unregister(&di->fg_psy); platform_set_drvdata(pdev, NULL); - kfree(di); return ret; } @@ -2442,21 +2440,39 @@ static struct ab8500_fg_interrupts ab8500_fg_irq[] = { {"CCEOC", ab8500_fg_cc_data_end_handler}, }; +static char *supply_interface[] = { + "ab8500_chargalg", + "ab8500_usb", +}; + static int __devinit ab8500_fg_probe(struct platform_device *pdev) { + struct device_node *np = pdev->dev.of_node; + struct ab8500_fg *di; int i, irq; int ret = 0; - struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data; - struct ab8500_fg *di; - - if (!plat_data) { - dev_err(&pdev->dev, "No platform data\n"); - return -EINVAL; - } - di = kzalloc(sizeof(*di), GFP_KERNEL); - if (!di) + di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL); + if (!di) { + dev_err(&pdev->dev, "%s no mem for ab8500_fg\n", __func__); return -ENOMEM; + } + di->bat = pdev->mfd_cell->platform_data; + if (!di->bat) { + if (np) { + ret = bmdevs_of_probe(&pdev->dev, np, &di->bat); + if (ret) { + dev_err(&pdev->dev, + "failed to get battery information\n"); + return ret; + } + } else { + dev_err(&pdev->dev, "missing dt node for ab8500_fg\n"); + return -EINVAL; + } + } else { + dev_info(&pdev->dev, "falling back to legacy platform data\n"); + } mutex_init(&di->cc_lock); @@ -2465,29 +2481,13 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev) di->parent = dev_get_drvdata(pdev->dev.parent); di->gpadc = ab8500_gpadc_get("ab8500-gpadc.0"); - /* get fg specific platform data */ - di->pdata = plat_data->fg; - if (!di->pdata) { - dev_err(di->dev, "no fg platform data supplied\n"); - ret = -EINVAL; - goto free_device_info; - } - - /* get battery specific platform data */ - di->bat = plat_data->battery; - if (!di->bat) { - dev_err(di->dev, "no battery platform data supplied\n"); - ret = -EINVAL; - goto free_device_info; - } - di->fg_psy.name = "ab8500_fg"; di->fg_psy.type = POWER_SUPPLY_TYPE_BATTERY; di->fg_psy.properties = ab8500_fg_props; di->fg_psy.num_properties = ARRAY_SIZE(ab8500_fg_props); di->fg_psy.get_property = ab8500_fg_get_property; - di->fg_psy.supplied_to = di->pdata->supplied_to; - di->fg_psy.num_supplicants = di->pdata->num_supplicants; + di->fg_psy.supplied_to = supply_interface; + di->fg_psy.num_supplicants = ARRAY_SIZE(supply_interface), di->fg_psy.external_power_changed = ab8500_fg_external_power_changed; di->bat_cap.max_mah_design = MILLI_TO_MICRO * @@ -2506,8 +2506,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev) di->fg_wq = create_singlethread_workqueue("ab8500_fg_wq"); if (di->fg_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); - ret = -ENOMEM; - goto free_device_info; + return -ENOMEM; } /* Init work for running the fg algorithm instantly */ @@ -2606,12 +2605,14 @@ free_irq: } free_inst_curr_wq: destroy_workqueue(di->fg_wq); -free_device_info: - kfree(di); - return ret; } +static const struct of_device_id ab8500_fg_match[] = { + { .compatible = "stericsson,ab8500-fg", }, + { }, +}; + static struct platform_driver ab8500_fg_driver = { .probe = ab8500_fg_probe, .remove = __devexit_p(ab8500_fg_remove), @@ -2620,6 +2621,7 @@ static struct platform_driver ab8500_fg_driver = { .driver = { .name = "ab8500-fg", .owner = THIS_MODULE, + .of_match_table = ab8500_fg_match, }, }; diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index 4d302803ffcc..758ea76de2f8 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -231,7 +231,7 @@ struct abx500_chargalg { struct abx500_chargalg_charger_info chg_info; struct abx500_chargalg_battery_data batt_data; struct abx500_chargalg_suspension_status susp_status; - struct abx500_chargalg_platform_data *pdata; + struct abx500_bmdevs_plat_data *pdata; struct abx500_bm_data *bat; struct power_supply chargalg_psy; struct ux500_charger *ac_chg; @@ -1802,7 +1802,7 @@ static int __devexit abx500_chargalg_remove(struct platform_device *pdev) static int __devinit abx500_chargalg_probe(struct platform_device *pdev) { - struct abx500_bm_plat_data *plat_data; + struct abx500_bmdevs_plat_data *plat_data; int ret = 0; struct abx500_chargalg *di = @@ -1812,10 +1812,8 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev) /* get device struct */ di->dev = &pdev->dev; - plat_data = pdev->dev.platform_data; - di->pdata = plat_data->chargalg; - di->bat = plat_data->battery; + di->pdata = plat_data; /* chargalg supply */ di->chargalg_psy.name = "abx500_chargalg"; diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 5d5298d56026..33f2c58554f4 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -267,39 +267,27 @@ struct abx500_bm_data { int gnd_lift_resistance; const struct abx500_maxim_parameters *maxi; const struct abx500_bm_capacity_levels *cap_levels; - const struct abx500_battery_type *bat_type; + struct abx500_battery_type *bat_type; const struct abx500_bm_charger_parameters *chg_params; const struct abx500_fg_parameters *fg_params; }; -struct abx500_chargalg_platform_data { - char **supplied_to; - size_t num_supplicants; -}; - -struct abx500_charger_platform_data { - char **supplied_to; - size_t num_supplicants; - bool autopower_cfg; -}; +extern struct abx500_bm_data ab8500_bm_data; -struct abx500_btemp_platform_data { - char **supplied_to; - size_t num_supplicants; +struct abx500_bmdevs_plat_data { + char **supplied_to; + size_t num_supplicants; + bool autopower_cfg; }; -struct abx500_fg_platform_data { - char **supplied_to; - size_t num_supplicants; +enum { + NTC_EXTERNAL = 0, + NTC_INTERNAL, }; -struct abx500_bm_plat_data { - struct abx500_bm_data *battery; - struct abx500_charger_platform_data *charger; - struct abx500_btemp_platform_data *btemp; - struct abx500_fg_platform_data *fg; - struct abx500_chargalg_platform_data *chargalg; -}; +int bmdevs_of_probe(struct device *dev, + struct device_node *np, + struct abx500_bm_data **battery); int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg, u8 value); -- cgit v1.2.3 From a12810ab9fcf0c9fd5e50b5e350a3ffbeaa571be Mon Sep 17 00:00:00 2001 From: "Rajanikanth H.V" Date: Wed, 31 Oct 2012 15:40:33 +0000 Subject: ab8500: Add devicetree support for chargalg This patch adds device tree support for charging algorithm driver Signed-off-by: Rajanikanth H.V Signed-off-by: Anton Vorontsov --- .../bindings/power_supply/ab8500/chargalg.txt | 16 +++++++ arch/arm/boot/dts/dbx5x0.dtsi | 5 ++ drivers/mfd/ab8500-core.c | 5 ++ drivers/power/abx500_chargalg.c | 54 +++++++++++++++------- include/linux/mfd/abx500.h | 6 --- 5 files changed, 64 insertions(+), 22 deletions(-) create mode 100644 Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt new file mode 100644 index 000000000000..ef5328371122 --- /dev/null +++ b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt @@ -0,0 +1,16 @@ +=== AB8500 Charging Algorithm Driver === + +The properties below describes the node for chargalg driver. + +Required Properties: +- compatible = Shall be: "stericsson,ab8500-chargalg" +- battery = Shall be battery specific information + +Example: +ab8500_chargalg { + compatible = "stericsson,ab8500-chargalg"; + battery = <&ab8500_battery>; +}; + +For information on battery specific node, Ref: +Documentation/devicetree/bindings/power_supply/ab8500/fg.txt diff --git a/arch/arm/boot/dts/dbx5x0.dtsi b/arch/arm/boot/dts/dbx5x0.dtsi index 1d4ad3098aeb..12a68af44903 100644 --- a/arch/arm/boot/dts/dbx5x0.dtsi +++ b/arch/arm/boot/dts/dbx5x0.dtsi @@ -373,6 +373,11 @@ vddadc-supply = <&ab8500_ldo_tvout_reg>; }; + ab8500_chargalg { + compatible = "stericsson,ab8500-chargalg"; + battery = <&ab8500_battery>; + }; + ab8500_usb { compatible = "stericsson,ab8500-usb"; interrupts = < 90 0x4 diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index c7a120bfd50a..5ec70f26b9d5 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -1071,8 +1071,13 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = { }, { .name = "ab8500-chargalg", + .of_compatible = "stericsson,ab8500-chargalg", .num_resources = ARRAY_SIZE(ab8500_chargalg_resources), .resources = ab8500_chargalg_resources, +#ifndef CONFIG_OF + .platform_data = &ab8500_bm_data, + .pdata_size = sizeof(ab8500_bm_data), +#endif }, }; diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index 758ea76de2f8..dcdc4393b9e7 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -205,7 +207,6 @@ enum maxim_ret { * @chg_info: information about connected charger types * @batt_data: data of the battery * @susp_status: current charger suspension status - * @pdata: pointer to the abx500_chargalg platform data * @bat: pointer to the abx500_bm platform data * @chargalg_psy: structure that holds the battery properties exposed by * the charging algorithm @@ -231,7 +232,6 @@ struct abx500_chargalg { struct abx500_chargalg_charger_info chg_info; struct abx500_chargalg_battery_data batt_data; struct abx500_chargalg_suspension_status susp_status; - struct abx500_bmdevs_plat_data *pdata; struct abx500_bm_data *bat; struct power_supply chargalg_psy; struct ux500_charger *ac_chg; @@ -1795,25 +1795,44 @@ static int __devexit abx500_chargalg_remove(struct platform_device *pdev) flush_scheduled_work(); power_supply_unregister(&di->chargalg_psy); platform_set_drvdata(pdev, NULL); - kfree(di); return 0; } +static char *supply_interface[] = { + "ab8500_fg", +}; + static int __devinit abx500_chargalg_probe(struct platform_device *pdev) { - struct abx500_bmdevs_plat_data *plat_data; + struct device_node *np = pdev->dev.of_node; + struct abx500_chargalg *di; int ret = 0; - struct abx500_chargalg *di = - kzalloc(sizeof(struct abx500_chargalg), GFP_KERNEL); - if (!di) + di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL); + if (!di) { + dev_err(&pdev->dev, "%s no mem for ab8500_chargalg\n", __func__); return -ENOMEM; + } + di->bat = pdev->mfd_cell->platform_data; + if (!di->bat) { + if (np) { + ret = bmdevs_of_probe(&pdev->dev, np, &di->bat); + if (ret) { + dev_err(&pdev->dev, + "failed to get battery information\n"); + return ret; + } + } else { + dev_err(&pdev->dev, "missing dt node for ab8500_chargalg\n"); + return -EINVAL; + } + } else { + dev_info(&pdev->dev, "falling back to legacy platform data\n"); + } /* get device struct */ di->dev = &pdev->dev; - plat_data = pdev->dev.platform_data; - di->pdata = plat_data; /* chargalg supply */ di->chargalg_psy.name = "abx500_chargalg"; @@ -1821,8 +1840,8 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev) di->chargalg_psy.properties = abx500_chargalg_props; di->chargalg_psy.num_properties = ARRAY_SIZE(abx500_chargalg_props); di->chargalg_psy.get_property = abx500_chargalg_get_property; - di->chargalg_psy.supplied_to = di->pdata->supplied_to; - di->chargalg_psy.num_supplicants = di->pdata->num_supplicants; + di->chargalg_psy.supplied_to = supply_interface; + di->chargalg_psy.num_supplicants = ARRAY_SIZE(supply_interface), di->chargalg_psy.external_power_changed = abx500_chargalg_external_power_changed; @@ -1842,7 +1861,7 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev) create_singlethread_workqueue("abx500_chargalg_wq"); if (di->chargalg_wq == NULL) { dev_err(di->dev, "failed to create work queue\n"); - goto free_device_info; + return -ENOMEM; } /* Init work for chargalg */ @@ -1883,20 +1902,23 @@ free_psy: power_supply_unregister(&di->chargalg_psy); free_chargalg_wq: destroy_workqueue(di->chargalg_wq); -free_device_info: - kfree(di); - return ret; } +static const struct of_device_id ab8500_chargalg_match[] = { + { .compatible = "stericsson,ab8500-chargalg", }, + { }, +}; + static struct platform_driver abx500_chargalg_driver = { .probe = abx500_chargalg_probe, .remove = __devexit_p(abx500_chargalg_remove), .suspend = abx500_chargalg_suspend, .resume = abx500_chargalg_resume, .driver = { - .name = "abx500-chargalg", + .name = "ab8500-chargalg", .owner = THIS_MODULE, + .of_match_table = ab8500_chargalg_match, }, }; diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 33f2c58554f4..2138bd33021a 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -274,12 +274,6 @@ struct abx500_bm_data { extern struct abx500_bm_data ab8500_bm_data; -struct abx500_bmdevs_plat_data { - char **supplied_to; - size_t num_supplicants; - bool autopower_cfg; -}; - enum { NTC_EXTERNAL = 0, NTC_INTERNAL, -- cgit v1.2.3 From 0857ba3c24c308f42a242fe8a1894772750230ce Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 18 Nov 2012 18:36:19 +0200 Subject: i2c: i2c-cbus-gpio: introduce driver Add i2c driver to enable access to devices behind CBUS on Nokia Internet Tablets. The patch also adds CBUS I2C configuration for N8x0 which is one of the users of this driver. Acked-by: Felipe Balbi Acked-by: Tony Lindgren Signed-off-by: Aaro Koskinen Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/i2c-cbus-gpio.txt | 27 ++ arch/arm/mach-omap2/board-n8x0.c | 42 +++ drivers/i2c/busses/Kconfig | 10 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-cbus-gpio.c | 300 +++++++++++++++++++++ include/linux/platform_data/i2c-cbus-gpio.h | 27 ++ 6 files changed, 407 insertions(+) create mode 100644 Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt create mode 100644 drivers/i2c/busses/i2c-cbus-gpio.c create mode 100644 include/linux/platform_data/i2c-cbus-gpio.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt new file mode 100644 index 000000000000..8ce9cd2855b5 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt @@ -0,0 +1,27 @@ +Device tree bindings for i2c-cbus-gpio driver + +Required properties: + - compatible = "i2c-cbus-gpio"; + - gpios: clk, dat, sel + - #address-cells = <1>; + - #size-cells = <0>; + +Optional properties: + - child nodes conforming to i2c bus binding + +Example: + +i2c@0 { + compatible = "i2c-cbus-gpio"; + gpios = <&gpio 66 0 /* clk */ + &gpio 65 0 /* dat */ + &gpio 64 0 /* sel */ + >; + #address-cells = <1>; + #size-cells = <0>; + + retu-mfd: retu@1 { + compatible = "retu-mfd"; + reg = <0x1>; + }; +}; diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index d95f727ca39a..bbfd74263c42 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -39,6 +41,45 @@ #define TUSB6010_GPIO_ENABLE 0 #define TUSB6010_DMACHAN 0x3f +#if defined(CONFIG_I2C_CBUS_GPIO) || defined(CONFIG_I2C_CBUS_GPIO_MODULE) +static struct i2c_cbus_platform_data n8x0_cbus_data = { + .clk_gpio = 66, + .dat_gpio = 65, + .sel_gpio = 64, +}; + +static struct platform_device n8x0_cbus_device = { + .name = "i2c-cbus-gpio", + .id = 3, + .dev = { + .platform_data = &n8x0_cbus_data, + }, +}; + +static struct i2c_board_info n8x0_i2c_board_info_3[] __initdata = { + { + I2C_BOARD_INFO("retu-mfd", 0x01), + }, +}; + +static void __init n8x0_cbus_init(void) +{ + const int retu_irq_gpio = 108; + + if (gpio_request_one(retu_irq_gpio, GPIOF_IN, "Retu IRQ")) + return; + irq_set_irq_type(gpio_to_irq(retu_irq_gpio), IRQ_TYPE_EDGE_RISING); + n8x0_i2c_board_info_3[0].irq = gpio_to_irq(retu_irq_gpio); + i2c_register_board_info(3, n8x0_i2c_board_info_3, + ARRAY_SIZE(n8x0_i2c_board_info_3)); + platform_device_register(&n8x0_cbus_device); +} +#else /* CONFIG_I2C_CBUS_GPIO */ +static void __init n8x0_cbus_init(void) +{ +} +#endif /* CONFIG_I2C_CBUS_GPIO */ + #if defined(CONFIG_USB_MUSB_TUSB6010) || defined(CONFIG_USB_MUSB_TUSB6010_MODULE) /* * Enable or disable power to TUSB6010. When enabling, turn on 3.3 V and @@ -677,6 +718,7 @@ static void __init n8x0_init_machine(void) gpmc_onenand_init(board_onenand_data); n8x0_mmc_init(); n8x0_usb_init(); + n8x0_cbus_init(); } MACHINE_START(NOKIA_N800, "Nokia N800") diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e9df4612b7eb..e949edf644d4 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -337,6 +337,16 @@ config I2C_BLACKFIN_TWI_CLK_KHZ help The unit of the TWI clock is kHz. +config I2C_CBUS_GPIO + tristate "CBUS I2C driver" + depends on GENERIC_GPIO + help + Support for CBUS access using I2C API. Mostly relevant for Nokia + Internet Tablets (770, N800 and N810). + + This driver can also be built as a module. If so, the module + will be called i2c-cbus-gpio. + config I2C_CPM tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)" depends on (CPM1 || CPM2) && OF_I2C diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 395b516ffa08..f9e3e0b5c827 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_I2C_POWERMAC) += i2c-powermac.o obj-$(CONFIG_I2C_AT91) += i2c-at91.o obj-$(CONFIG_I2C_AU1550) += i2c-au1550.o obj-$(CONFIG_I2C_BLACKFIN_TWI) += i2c-bfin-twi.o +obj-$(CONFIG_I2C_CBUS_GPIO) += i2c-cbus-gpio.o obj-$(CONFIG_I2C_CPM) += i2c-cpm.o obj-$(CONFIG_I2C_DAVINCI) += i2c-davinci.o obj-$(CONFIG_I2C_DESIGNWARE_CORE) += i2c-designware-core.o diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c new file mode 100644 index 000000000000..98386d659318 --- /dev/null +++ b/drivers/i2c/busses/i2c-cbus-gpio.c @@ -0,0 +1,300 @@ +/* + * CBUS I2C driver for Nokia Internet Tablets. + * + * Copyright (C) 2004-2010 Nokia Corporation + * + * Based on code written by Juha Yrjölä, David Weinehall, Mikko Ylinen and + * Felipe Balbi. Converted to I2C driver by Aaro Koskinen. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of this + * archive for more details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Bit counts are derived from Nokia implementation. These should be checked + * if other CBUS implementations appear. + */ +#define CBUS_ADDR_BITS 3 +#define CBUS_REG_BITS 5 + +struct cbus_host { + spinlock_t lock; /* host lock */ + struct device *dev; + int clk_gpio; + int dat_gpio; + int sel_gpio; +}; + +/** + * cbus_send_bit - sends one bit over the bus + * @host: the host we're using + * @bit: one bit of information to send + */ +static void cbus_send_bit(struct cbus_host *host, unsigned bit) +{ + gpio_set_value(host->dat_gpio, bit ? 1 : 0); + gpio_set_value(host->clk_gpio, 1); + gpio_set_value(host->clk_gpio, 0); +} + +/** + * cbus_send_data - sends @len amount of data over the bus + * @host: the host we're using + * @data: the data to send + * @len: size of the transfer + */ +static void cbus_send_data(struct cbus_host *host, unsigned data, unsigned len) +{ + int i; + + for (i = len; i > 0; i--) + cbus_send_bit(host, data & (1 << (i - 1))); +} + +/** + * cbus_receive_bit - receives one bit from the bus + * @host: the host we're using + */ +static int cbus_receive_bit(struct cbus_host *host) +{ + int ret; + + gpio_set_value(host->clk_gpio, 1); + ret = gpio_get_value(host->dat_gpio); + gpio_set_value(host->clk_gpio, 0); + return ret; +} + +/** + * cbus_receive_word - receives 16-bit word from the bus + * @host: the host we're using + */ +static int cbus_receive_word(struct cbus_host *host) +{ + int ret = 0; + int i; + + for (i = 16; i > 0; i--) { + int bit = cbus_receive_bit(host); + + if (bit < 0) + return bit; + + if (bit) + ret |= 1 << (i - 1); + } + return ret; +} + +/** + * cbus_transfer - transfers data over the bus + * @host: the host we're using + * @rw: read/write flag + * @dev: device address + * @reg: register address + * @data: if @rw == I2C_SBUS_WRITE data to send otherwise 0 + */ +static int cbus_transfer(struct cbus_host *host, char rw, unsigned dev, + unsigned reg, unsigned data) +{ + unsigned long flags; + int ret; + + /* We don't want interrupts disturbing our transfer */ + spin_lock_irqsave(&host->lock, flags); + + /* Reset state and start of transfer, SEL stays down during transfer */ + gpio_set_value(host->sel_gpio, 0); + + /* Set the DAT pin to output */ + gpio_direction_output(host->dat_gpio, 1); + + /* Send the device address */ + cbus_send_data(host, dev, CBUS_ADDR_BITS); + + /* Send the rw flag */ + cbus_send_bit(host, rw == I2C_SMBUS_READ); + + /* Send the register address */ + cbus_send_data(host, reg, CBUS_REG_BITS); + + if (rw == I2C_SMBUS_WRITE) { + cbus_send_data(host, data, 16); + ret = 0; + } else { + ret = gpio_direction_input(host->dat_gpio); + if (ret) { + dev_dbg(host->dev, "failed setting direction\n"); + goto out; + } + gpio_set_value(host->clk_gpio, 1); + + ret = cbus_receive_word(host); + if (ret < 0) { + dev_dbg(host->dev, "failed receiving data\n"); + goto out; + } + } + + /* Indicate end of transfer, SEL goes up until next transfer */ + gpio_set_value(host->sel_gpio, 1); + gpio_set_value(host->clk_gpio, 1); + gpio_set_value(host->clk_gpio, 0); + +out: + spin_unlock_irqrestore(&host->lock, flags); + + return ret; +} + +static int cbus_i2c_smbus_xfer(struct i2c_adapter *adapter, + u16 addr, + unsigned short flags, + char read_write, + u8 command, + int size, + union i2c_smbus_data *data) +{ + struct cbus_host *chost = i2c_get_adapdata(adapter); + int ret; + + if (size != I2C_SMBUS_WORD_DATA) + return -EINVAL; + + ret = cbus_transfer(chost, read_write == I2C_SMBUS_READ, addr, + command, data->word); + if (ret < 0) + return ret; + + if (read_write == I2C_SMBUS_READ) + data->word = ret; + + return 0; +} + +static u32 cbus_i2c_func(struct i2c_adapter *adapter) +{ + return I2C_FUNC_SMBUS_READ_WORD_DATA | I2C_FUNC_SMBUS_WRITE_WORD_DATA; +} + +static const struct i2c_algorithm cbus_i2c_algo = { + .smbus_xfer = cbus_i2c_smbus_xfer, + .functionality = cbus_i2c_func, +}; + +static int cbus_i2c_remove(struct platform_device *pdev) +{ + struct i2c_adapter *adapter = platform_get_drvdata(pdev); + + return i2c_del_adapter(adapter); +} + +static int cbus_i2c_probe(struct platform_device *pdev) +{ + struct i2c_adapter *adapter; + struct cbus_host *chost; + int ret; + + adapter = devm_kzalloc(&pdev->dev, sizeof(struct i2c_adapter), + GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + chost = devm_kzalloc(&pdev->dev, sizeof(*chost), GFP_KERNEL); + if (!chost) + return -ENOMEM; + + if (pdev->dev.of_node) { + struct device_node *dnode = pdev->dev.of_node; + if (of_gpio_count(dnode) != 3) + return -ENODEV; + chost->clk_gpio = of_get_gpio(dnode, 0); + chost->dat_gpio = of_get_gpio(dnode, 1); + chost->sel_gpio = of_get_gpio(dnode, 2); + } else if (pdev->dev.platform_data) { + struct i2c_cbus_platform_data *pdata = pdev->dev.platform_data; + chost->clk_gpio = pdata->clk_gpio; + chost->dat_gpio = pdata->dat_gpio; + chost->sel_gpio = pdata->sel_gpio; + } else { + return -ENODEV; + } + + adapter->owner = THIS_MODULE; + adapter->class = I2C_CLASS_HWMON; + adapter->dev.parent = &pdev->dev; + adapter->nr = pdev->id; + adapter->timeout = HZ; + adapter->algo = &cbus_i2c_algo; + strlcpy(adapter->name, "CBUS I2C adapter", sizeof(adapter->name)); + + spin_lock_init(&chost->lock); + chost->dev = &pdev->dev; + + ret = devm_gpio_request_one(&pdev->dev, chost->clk_gpio, + GPIOF_OUT_INIT_LOW, "CBUS clk"); + if (ret) + return ret; + + ret = devm_gpio_request_one(&pdev->dev, chost->dat_gpio, GPIOF_IN, + "CBUS data"); + if (ret) + return ret; + + ret = devm_gpio_request_one(&pdev->dev, chost->sel_gpio, + GPIOF_OUT_INIT_HIGH, "CBUS sel"); + if (ret) + return ret; + + i2c_set_adapdata(adapter, chost); + platform_set_drvdata(pdev, adapter); + + return i2c_add_numbered_adapter(adapter); +} + +#if defined(CONFIG_OF) +static const struct of_device_id i2c_cbus_dt_ids[] = { + { .compatible = "i2c-cbus-gpio", }, + { } +}; +MODULE_DEVICE_TABLE(of, i2c_cbus_dt_ids); +#endif + +static struct platform_driver cbus_i2c_driver = { + .probe = cbus_i2c_probe, + .remove = cbus_i2c_remove, + .driver = { + .owner = THIS_MODULE, + .name = "i2c-cbus-gpio", + }, +}; +module_platform_driver(cbus_i2c_driver); + +MODULE_ALIAS("platform:i2c-cbus-gpio"); +MODULE_DESCRIPTION("CBUS I2C driver"); +MODULE_AUTHOR("Juha Yrjölä"); +MODULE_AUTHOR("David Weinehall"); +MODULE_AUTHOR("Mikko Ylinen"); +MODULE_AUTHOR("Felipe Balbi"); +MODULE_AUTHOR("Aaro Koskinen "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_data/i2c-cbus-gpio.h b/include/linux/platform_data/i2c-cbus-gpio.h new file mode 100644 index 000000000000..6faa992a9502 --- /dev/null +++ b/include/linux/platform_data/i2c-cbus-gpio.h @@ -0,0 +1,27 @@ +/* + * i2c-cbus-gpio.h - CBUS I2C platform_data definition + * + * Copyright (C) 2004-2009 Nokia Corporation + * + * Written by Felipe Balbi and Aaro Koskinen. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of this + * archive for more details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __INCLUDE_LINUX_I2C_CBUS_GPIO_H +#define __INCLUDE_LINUX_I2C_CBUS_GPIO_H + +struct i2c_cbus_platform_data { + int dat_gpio; + int clk_gpio; + int sel_gpio; +}; + +#endif /* __INCLUDE_LINUX_I2C_CBUS_GPIO_H */ -- cgit v1.2.3 From 49f4d8b93ccf9454284b6f524b96c66d8d7fbccc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Aug 2012 04:25:10 -0700 Subject: pidns: Capture the user namespace and filter ns_last_pid - Capture the the user namespace that creates the pid namespace - Use that user namespace to test if it is ok to write to /proc/sys/kernel/ns_last_pid. Zhao Hongjiang noticed I was missing a put_user_ns in when destroying a pid_ns. I have foloded his patch into this one so that bisects will work properly. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/pid_namespace.h | 8 +++++--- kernel/nsproxy.c | 2 +- kernel/pid.c | 1 + kernel/pid_namespace.c | 17 ++++++++++++----- 4 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 65e3e87eacc5..c89c9cfcd247 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -31,6 +31,7 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + struct user_namespace *user_ns; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ @@ -46,7 +47,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); +extern struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); extern void put_pid_ns(struct pid_namespace *ns); @@ -59,8 +61,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -static inline struct pid_namespace * -copy_pid_ns(unsigned long flags, struct pid_namespace *ns) +static inline struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns) { if (flags & CLONE_NEWPID) ns = ERR_PTR(-EINVAL); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 7e1c3de1ce45..ca27d2c5264d 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -84,7 +84,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); + new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), task_active_pid_ns(tsk)); if (IS_ERR(new_nsp->pid_ns)) { err = PTR_ERR(new_nsp->pid_ns); goto out_pid; diff --git a/kernel/pid.c b/kernel/pid.c index aebd4f5aaf41..2a624f1486e1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = { .last_pid = 0, .level = 0, .child_reaper = &init_task, + .user_ns = &init_user_ns, }; EXPORT_SYMBOL_GPL(init_pid_ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7b07cc0dfb75..b2604950aa50 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -74,7 +75,8 @@ err_alloc: /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ #define MAX_PID_NS_LEVEL 32 -static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) +static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, + struct pid_namespace *parent_pid_ns) { struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; @@ -102,6 +104,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p kref_init(&ns->kref); ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); + ns->user_ns = get_user_ns(user_ns); set_bit(0, ns->pidmap[0].page); atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); @@ -117,6 +120,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p out_put_parent_pid_ns: put_pid_ns(parent_pid_ns); + put_user_ns(user_ns); out_free_map: kfree(ns->pidmap[0].page); out_free: @@ -131,16 +135,18 @@ static void destroy_pid_namespace(struct pid_namespace *ns) for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); + put_user_ns(ns->user_ns); kmem_cache_free(pid_ns_cachep, ns); } -struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) +struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *old_ns) { if (!(flags & CLONE_NEWPID)) return get_pid_ns(old_ns); if (flags & (CLONE_THREAD|CLONE_PARENT)) return ERR_PTR(-EINVAL); - return create_pid_namespace(old_ns); + return create_pid_namespace(user_ns, old_ns); } static void free_pid_ns(struct kref *kref) @@ -239,9 +245,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) static int pid_ns_ctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct pid_namespace *pid_ns = task_active_pid_ns(current); struct ctl_table tmp = *table; - if (write && !capable(CAP_SYS_ADMIN)) + if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; /* @@ -250,7 +257,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, * it should synchronize its usage with external means. */ - tmp.data = ¤t->nsproxy->pid_ns->last_pid; + tmp.data = &pid_ns->last_pid; return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); } -- cgit v1.2.3 From 0a01f2cc390e10633a54f72c608cc3fe19a50c3d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Aug 2012 10:33:47 -0700 Subject: pidns: Make the pidns proc mount/umount logic obvious. Track the number of pids in the proc hash table. When the number of pids goes to 0 schedule work to unmount the kernel mount of proc. Move the mount of proc into alloc_pid when we allocate the pid for init. Remove the surprising calls of pid_ns_release proc in fork and proc_flush_task. Those code paths really shouldn't know about proc namespace implementation details and people have demonstrated several times that finding and understanding those code paths is difficult and non-obvious. Because of the call path detach pid is alwasy called with the rtnl_lock held free_pid is not allowed to sleep, so the work to unmounting proc is moved to a work queue. This has the side benefit of not blocking the entire world waiting for the unnecessary rcu_barrier in deactivate_locked_super. In the process of making the code clear and obvious this fixes a bug reported by Gao feng where we would leak a mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns succeeded and copy_net_ns failed. Acked-by: "Serge E. Hallyn" Signed-off-by: "Eric W. Biederman" --- fs/proc/base.c | 4 ---- fs/proc/root.c | 5 ----- include/linux/pid_namespace.h | 2 ++ kernel/fork.c | 2 -- kernel/pid.c | 21 +++++++++++++++++---- kernel/pid_namespace.c | 14 +++++++------- 6 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index 6177fc238fdb..7621dc51cff8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2590,10 +2590,6 @@ void proc_flush_task(struct task_struct *task) proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, tgid->numbers[i].nr); } - - upid = &pid->numbers[pid->level]; - if (upid->nr == 1) - pid_ns_release_proc(upid->ns); } static struct dentry *proc_pid_instantiate(struct inode *dir, diff --git a/fs/proc/root.c b/fs/proc/root.c index fc1609321a78..f2f251158d35 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -155,11 +155,6 @@ void __init proc_root_init(void) err = register_filesystem(&proc_fs_type); if (err) return; - err = pid_ns_prepare_proc(&init_pid_ns); - if (err) { - unregister_filesystem(&proc_fs_type); - return; - } proc_self_init(); proc_symlink("mounts", NULL, "self/mounts"); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c89c9cfcd247..4c96acdb2489 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -21,6 +21,7 @@ struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; int last_pid; + int nr_hashed; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; @@ -32,6 +33,7 @@ struct pid_namespace { struct bsd_acct_struct *bacct; #endif struct user_namespace *user_ns; + struct work_struct proc_work; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ diff --git a/kernel/fork.c b/kernel/fork.c index 7798c247f4b9..666dc8b06606 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1476,8 +1476,6 @@ bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: - if (unlikely(clone_flags & CLONE_NEWPID)) - pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) diff --git a/kernel/pid.c b/kernel/pid.c index 3a5f238c1ca0..e957f8b09136 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -36,6 +36,7 @@ #include #include #include +#include #define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) @@ -270,8 +271,12 @@ void free_pid(struct pid *pid) unsigned long flags; spin_lock_irqsave(&pidmap_lock, flags); - for (i = 0; i <= pid->level; i++) - hlist_del_rcu(&pid->numbers[i].pid_chain); + for (i = 0; i <= pid->level; i++) { + struct upid *upid = pid->numbers + i; + hlist_del_rcu(&upid->pid_chain); + if (--upid->ns->nr_hashed == 0) + schedule_work(&upid->ns->proc_work); + } spin_unlock_irqrestore(&pidmap_lock, flags); for (i = 0; i <= pid->level; i++) @@ -293,6 +298,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) goto out; tmp = ns; + pid->level = ns->level; for (i = ns->level; i >= 0; i--) { nr = alloc_pidmap(tmp); if (nr < 0) @@ -303,17 +309,23 @@ struct pid *alloc_pid(struct pid_namespace *ns) tmp = tmp->parent; } + if (unlikely(is_child_reaper(pid))) { + if (pid_ns_prepare_proc(ns)) + goto out_free; + } + get_pid_ns(ns); - pid->level = ns->level; atomic_set(&pid->count, 1); for (type = 0; type < PIDTYPE_MAX; ++type) INIT_HLIST_HEAD(&pid->tasks[type]); upid = pid->numbers + ns->level; spin_lock_irq(&pidmap_lock); - for ( ; upid >= pid->numbers; --upid) + for ( ; upid >= pid->numbers; --upid) { hlist_add_head_rcu(&upid->pid_chain, &pid_hash[pid_hashfn(upid->nr, upid->ns)]); + upid->ns->nr_hashed++; + } spin_unlock_irq(&pidmap_lock); out: @@ -570,6 +582,7 @@ void __init pidmap_init(void) /* Reserve PID 0. We never call free_pidmap(0) */ set_bit(0, init_pid_ns.pidmap[0].page); atomic_dec(&init_pid_ns.pidmap[0].nr_free); + init_pid_ns.nr_hashed = 1; init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index b2604950aa50..84591cfeefc1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -72,6 +72,12 @@ err_alloc: return NULL; } +static void proc_cleanup_work(struct work_struct *work) +{ + struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); + pid_ns_release_proc(ns); +} + /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ #define MAX_PID_NS_LEVEL 32 @@ -105,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); + INIT_WORK(&ns->proc_work, proc_cleanup_work); set_bit(0, ns->pidmap[0].page); atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); @@ -112,15 +119,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns for (i = 1; i < PIDMAP_ENTRIES; i++) atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); - err = pid_ns_prepare_proc(ns); - if (err) - goto out_put_parent_pid_ns; - return ns; -out_put_parent_pid_ns: - put_pid_ns(parent_pid_ns); - put_user_ns(user_ns); out_free_map: kfree(ns->pidmap[0].page); out_free: -- cgit v1.2.3 From 57e8391d327609cbf12d843259c968b9e5c1838f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:17:03 -0800 Subject: pidns: Add setns support - Pid namespaces are designed to be inescapable so verify that the passed in pid namespace is a child of the currently active pid namespace or the currently active pid namespace itself. Allowing the currently active pid namespace is important so the effects of an earlier setns can be cancelled. Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 3 +++ include/linux/proc_fs.h | 1 + kernel/pid_namespace.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'include/linux') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b178ed733c36..85ca047e35f1 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -24,6 +24,9 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_IPC_NS &ipcns_operations, #endif +#ifdef CONFIG_PID_NS + &pidns_operations, +#endif }; static const struct file_operations ns_file_operations = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3fd2e871ff1b..acaafcd40aa5 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -251,6 +251,7 @@ struct proc_ns_operations { extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; +extern const struct proc_ns_operations pidns_operations; union proc_op { int (*proc_get_link)(struct dentry *, struct path *); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 0dbbc66b6ec6..f78fc48c86bc 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -301,6 +301,60 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) return 0; } +static void *pidns_get(struct task_struct *task) +{ + struct pid_namespace *ns; + + rcu_read_lock(); + ns = get_pid_ns(task_active_pid_ns(task)); + rcu_read_unlock(); + + return ns; +} + +static void pidns_put(void *ns) +{ + put_pid_ns(ns); +} + +static int pidns_install(struct nsproxy *nsproxy, void *ns) +{ + struct pid_namespace *active = task_active_pid_ns(current); + struct pid_namespace *ancestor, *new = ns; + + if (!ns_capable(new->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + /* + * Only allow entering the current active pid namespace + * or a child of the current active pid namespace. + * + * This is required for fork to return a usable pid value and + * this maintains the property that processes and their + * children can not escape their current pid namespace. + */ + if (new->level < active->level) + return -EINVAL; + + ancestor = new; + while (ancestor->level > active->level) + ancestor = ancestor->parent; + if (ancestor != active) + return -EINVAL; + + put_pid_ns(nsproxy->pid_ns); + nsproxy->pid_ns = get_pid_ns(new); + return 0; +} + +const struct proc_ns_operations pidns_operations = { + .name = "pid", + .type = CLONE_NEWPID, + .get = pidns_get, + .put = pidns_put, + .install = pidns_install, +}; + static __init int pid_namespaces_init(void) { pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); -- cgit v1.2.3 From 8823c079ba7136dc1948d6f6dcb5f8022bde438e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:49:36 -0800 Subject: vfs: Add setns support for the mount namespace setns support for the mount namespace is a little tricky as an arbitrary decision must be made about what to set fs->root and fs->pwd to, as there is no expectation of a relationship between the two mount namespaces. Therefore I arbitrarily find the root mount point, and follow every mount on top of it to find the top of the mount stack. Then I set fs->root and fs->pwd to that location. The topmost root of the mount stack seems like a reasonable place to be. Bind mount support for the mount namespace inodes has the possibility of creating circular dependencies between mount namespaces. Circular dependencies can result in loops that prevent mount namespaces from every being freed. I avoid creating those circular dependencies by adding a sequence number to the mount namespace and require all bind mounts be of a younger mount namespace into an older mount namespace. Add a helper function proc_ns_inode so it is possible to detect when we are attempting to bind mound a namespace inode. Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/mount.h | 1 + fs/namespace.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/namespaces.c | 5 +++ include/linux/proc_fs.h | 7 ++++ 4 files changed, 108 insertions(+) (limited to 'include/linux') diff --git a/fs/mount.h b/fs/mount.h index 4f291f9de641..e9c37dd3d00d 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -6,6 +6,7 @@ struct mnt_namespace { atomic_t count; struct mount * root; struct list_head list; + u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; int event; }; diff --git a/fs/namespace.c b/fs/namespace.c index 24960626bb6b..d287e7e74644 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -20,6 +20,7 @@ #include /* get_fs_root et.al. */ #include /* fsnotify_vfsmount_delete */ #include +#include #include "pnode.h" #include "internal.h" @@ -1308,6 +1309,26 @@ static int mount_is_safe(struct path *path) #endif } +static bool mnt_ns_loop(struct path *path) +{ + /* Could bind mounting the mount namespace inode cause a + * mount namespace loop? + */ + struct inode *inode = path->dentry->d_inode; + struct proc_inode *ei; + struct mnt_namespace *mnt_ns; + + if (!proc_ns_inode(inode)) + return false; + + ei = PROC_I(inode); + if (ei->ns_ops != &mntns_operations) + return false; + + mnt_ns = ei->ns; + return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; +} + struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, int flag) { @@ -1655,6 +1676,10 @@ static int do_loopback(struct path *path, const char *old_name, if (err) return err; + err = -EINVAL; + if (mnt_ns_loop(&old_path)) + goto out; + err = lock_mount(path); if (err) goto out; @@ -2261,6 +2286,15 @@ dput_out: return retval; } +/* + * Assign a sequence number so we can detect when we attempt to bind + * mount a reference to an older mount namespace into the current + * mount namespace, preventing reference counting loops. A 64bit + * number incrementing at 10Ghz will take 12,427 years to wrap which + * is effectively never, so we can ignore the possibility. + */ +static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); + static struct mnt_namespace *alloc_mnt_ns(void) { struct mnt_namespace *new_ns; @@ -2268,6 +2302,7 @@ static struct mnt_namespace *alloc_mnt_ns(void) new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); + new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); atomic_set(&new_ns->count, 1); new_ns->root = NULL; INIT_LIST_HEAD(&new_ns->list); @@ -2681,3 +2716,63 @@ bool our_mnt(struct vfsmount *mnt) { return check_mnt(real_mount(mnt)); } + +static void *mntns_get(struct task_struct *task) +{ + struct mnt_namespace *ns = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) { + ns = nsproxy->mnt_ns; + get_mnt_ns(ns); + } + rcu_read_unlock(); + + return ns; +} + +static void mntns_put(void *ns) +{ + put_mnt_ns(ns); +} + +static int mntns_install(struct nsproxy *nsproxy, void *ns) +{ + struct fs_struct *fs = current->fs; + struct mnt_namespace *mnt_ns = ns; + struct path root; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT)) + return -EINVAL; + + if (fs->users != 1) + return -EINVAL; + + get_mnt_ns(mnt_ns); + put_mnt_ns(nsproxy->mnt_ns); + nsproxy->mnt_ns = mnt_ns; + + /* Find the root */ + root.mnt = &mnt_ns->root->mnt; + root.dentry = mnt_ns->root->mnt.mnt_root; + path_get(&root); + while(d_mountpoint(root.dentry) && follow_down_one(&root)) + ; + + /* Update the pwd and root */ + set_fs_pwd(fs, &root); + set_fs_root(fs, &root); + + path_put(&root); + return 0; +} + +const struct proc_ns_operations mntns_operations = { + .name = "mnt", + .type = CLONE_NEWNS, + .get = mntns_get, + .put = mntns_put, + .install = mntns_install, +}; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 85ca047e35f1..2a17fd9ae6a9 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -27,6 +27,7 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_PID_NS &pidns_operations, #endif + &mntns_operations, }; static const struct file_operations ns_file_operations = { @@ -201,3 +202,7 @@ out_invalid: return ERR_PTR(-EINVAL); } +bool proc_ns_inode(struct inode *inode) +{ + return inode->i_fop == &ns_file_operations; +} diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index acaafcd40aa5..9014c041e752 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -174,6 +174,7 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); extern struct file *proc_ns_fget(int fd); +extern bool proc_ns_inode(struct inode *inode); #else @@ -229,6 +230,11 @@ static inline struct file *proc_ns_fget(int fd) return ERR_PTR(-EINVAL); } +static inline bool proc_ns_inode(struct inode *inode) +{ + return false; +} + #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) @@ -252,6 +258,7 @@ extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations mntns_operations; union proc_op { int (*proc_get_link)(struct dentry *, struct path *); -- cgit v1.2.3 From 771b1371686e0a63e938ada28de020b9a0040f55 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 21:08:32 -0700 Subject: vfs: Add a user namespace reference from struct mnt_namespace This will allow for support for unprivileged mounts in a new user namespace. Acked-by: "Serge E. Hallyn" Signed-off-by: "Eric W. Biederman" --- fs/mount.h | 1 + fs/namespace.c | 24 ++++++++++++++++-------- include/linux/mnt_namespace.h | 3 ++- kernel/nsproxy.c | 2 +- 4 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/mount.h b/fs/mount.h index e9c37dd3d00d..630fafc616bb 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -6,6 +6,7 @@ struct mnt_namespace { atomic_t count; struct mount * root; struct list_head list; + struct user_namespace *user_ns; u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; int event; diff --git a/fs/namespace.c b/fs/namespace.c index d287e7e74644..207c7ba84ad3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -2286,6 +2287,12 @@ dput_out: return retval; } +static void free_mnt_ns(struct mnt_namespace *ns) +{ + put_user_ns(ns->user_ns); + kfree(ns); +} + /* * Assign a sequence number so we can detect when we attempt to bind * mount a reference to an older mount namespace into the current @@ -2295,7 +2302,7 @@ dput_out: */ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); -static struct mnt_namespace *alloc_mnt_ns(void) +static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) { struct mnt_namespace *new_ns; @@ -2308,6 +2315,7 @@ static struct mnt_namespace *alloc_mnt_ns(void) INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); new_ns->event = 0; + new_ns->user_ns = get_user_ns(user_ns); return new_ns; } @@ -2316,7 +2324,7 @@ static struct mnt_namespace *alloc_mnt_ns(void) * copied from the namespace of the passed in task structure. */ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, - struct fs_struct *fs) + struct user_namespace *user_ns, struct fs_struct *fs) { struct mnt_namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; @@ -2324,7 +2332,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, struct mount *old = mnt_ns->root; struct mount *new; - new_ns = alloc_mnt_ns(); + new_ns = alloc_mnt_ns(user_ns); if (IS_ERR(new_ns)) return new_ns; @@ -2333,7 +2341,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); if (IS_ERR(new)) { up_write(&namespace_sem); - kfree(new_ns); + free_mnt_ns(new_ns); return ERR_CAST(new); } new_ns->root = new; @@ -2374,7 +2382,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, } struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, - struct fs_struct *new_fs) + struct user_namespace *user_ns, struct fs_struct *new_fs) { struct mnt_namespace *new_ns; @@ -2384,7 +2392,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, if (!(flags & CLONE_NEWNS)) return ns; - new_ns = dup_mnt_ns(ns, new_fs); + new_ns = dup_mnt_ns(ns, user_ns, new_fs); put_mnt_ns(ns); return new_ns; @@ -2396,7 +2404,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, */ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) { - struct mnt_namespace *new_ns = alloc_mnt_ns(); + struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns); if (!IS_ERR(new_ns)) { struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; @@ -2682,7 +2690,7 @@ void put_mnt_ns(struct mnt_namespace *ns) br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); - kfree(ns); + free_mnt_ns(ns); } struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 5a8e3903d770..12b2ab510323 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -4,9 +4,10 @@ struct mnt_namespace; struct fs_struct; +struct user_namespace; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, - struct fs_struct *); + struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); extern const struct file_operations proc_mounts_operations; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index b8d4d8709d70..7f8b051fc19f 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -66,7 +66,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, if (!new_nsp) return ERR_PTR(-ENOMEM); - new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); + new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, task_cred_xxx(tsk, user_ns), new_fs); if (IS_ERR(new_nsp->mnt_ns)) { err = PTR_ERR(new_nsp->mnt_ns); goto out_ns; -- cgit v1.2.3 From 0c55cfc4166d9a0f38de779bd4d75a90afbe7734 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 21:42:03 -0700 Subject: vfs: Allow unprivileged manipulation of the mount namespace. - Add a filesystem flag to mark filesystems that are safe to mount as an unprivileged user. - Add a filesystem flag to mark filesystems that don't need MNT_NODEV when mounted by an unprivileged user. - Relax the permission checks to allow unprivileged users that have CAP_SYS_ADMIN permissions in the user namespace referred to by the current mount namespace to be allowed to mount, unmount, and move filesystems. Acked-by: "Serge E. Hallyn" Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 69 ++++++++++++++++++++++++++++++++++-------------------- include/linux/fs.h | 2 ++ 2 files changed, 45 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 4dfcaf05d17c..9ddc86f93221 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1269,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) goto dput_and_out; retval = do_umount(mnt, flags); @@ -1295,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) static int mount_is_safe(struct path *path) { - if (capable(CAP_SYS_ADMIN)) + if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) return 0; return -EPERM; #ifdef notyet @@ -1633,7 +1633,7 @@ static int do_change_type(struct path *path, int flag) int type; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (path->dentry != path->mnt->mnt_root) @@ -1797,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name) struct mount *p; struct mount *old; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -1884,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return ERR_PTR(err); } -static struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) -{ - struct file_system_type *type = get_fs_type(fstype); - struct vfsmount *mnt; - if (!type) - return ERR_PTR(-ENODEV); - mnt = vfs_kern_mount(type, flags, name, data); - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && - !mnt->mnt_sb->s_subtype) - mnt = fs_set_subtype(mnt, fstype); - put_filesystem(type); - return mnt; -} - /* * add a mount into a namespace's mount tree */ @@ -1944,20 +1929,46 @@ unlock: * create a new mount for userspace and request it to be added into the * namespace's tree */ -static int do_new_mount(struct path *path, const char *type, int flags, +static int do_new_mount(struct path *path, const char *fstype, int flags, int mnt_flags, const char *name, void *data) { + struct file_system_type *type; + struct user_namespace *user_ns; struct vfsmount *mnt; int err; - if (!type) + if (!fstype) return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + user_ns = real_mount(path->mnt)->mnt_ns->user_ns; + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; - mnt = do_kern_mount(type, flags, name, data); + type = get_fs_type(fstype); + if (!type) + return -ENODEV; + + if (user_ns != &init_user_ns) { + if (!(type->fs_flags & FS_USERNS_MOUNT)) { + put_filesystem(type); + return -EPERM; + } + /* Only in special cases allow devices from mounts + * created outside the initial user namespace. + */ + if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { + flags |= MS_NODEV; + mnt_flags |= MNT_NODEV; + } + } + + mnt = vfs_kern_mount(type, flags, name, data); + if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && + !mnt->mnt_sb->s_subtype) + mnt = fs_set_subtype(mnt, fstype); + + put_filesystem(type); if (IS_ERR(mnt)) return PTR_ERR(mnt); @@ -2549,7 +2560,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, struct mount *new_mnt, *root_mnt; int error; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; error = user_path_dir(new_root, &new); @@ -2631,8 +2642,13 @@ static void __init init_mount_tree(void) struct vfsmount *mnt; struct mnt_namespace *ns; struct path root; + struct file_system_type *type; - mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); + type = get_fs_type("rootfs"); + if (!type) + panic("Can't find rootfs type"); + mnt = vfs_kern_mount(type, 0, "rootfs", NULL); + put_filesystem(type); if (IS_ERR(mnt)) panic("Can't create rootfs"); @@ -2757,7 +2773,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns) struct mnt_namespace *mnt_ns = ns; struct path root; - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT)) + if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || + !nsown_capable(CAP_SYS_CHROOT)) return -EINVAL; if (fs->users != 1) diff --git a/include/linux/fs.h b/include/linux/fs.h index b33cfc97b9ca..5037aa6817fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1812,6 +1812,8 @@ struct file_system_type { #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 +#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ +#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, -- cgit v1.2.3 From 4b20db3de8dab005b07c74161cb041db8c5ff3a7 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 6 Nov 2012 11:31:49 +0000 Subject: kref: Implement kref_get_unless_zero v3 This function is intended to simplify locking around refcounting for objects that can be looked up from a lookup structure, and which are removed from that lookup structure in the object destructor. Operations on such objects require at least a read lock around lookup + kref_get, and a write lock around kref_put + remove from lookup structure. Furthermore, RCU implementations become extremely tricky. With a lookup followed by a kref_get_unless_zero *with return value check* locking in the kref_put path can be deferred to the actual removal from the lookup structure and RCU lookups become trivial. v2: Formatting fixes. v3: Invert the return value. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/linux/kref.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 65af6887872f..4972e6e9ca93 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -111,4 +111,25 @@ static inline int kref_put_mutex(struct kref *kref, } return 0; } + +/** + * kref_get_unless_zero - Increment refcount for object unless it is zero. + * @kref: object. + * + * Return non-zero if the increment succeeded. Otherwise return 0. + * + * This function is intended to simplify locking around refcounting for + * objects that can be looked up from a lookup structure, and which are + * removed from that lookup structure in the object destructor. + * Operations on such objects require at least a read lock around + * lookup + kref_get, and a write lock around kref_put + remove from lookup + * structure. Furthermore, RCU implementations become extremely tricky. + * With a lookup followed by a kref_get_unless_zero *with return value check* + * locking in the kref_put path can be deferred to the actual removal from + * the lookup structure and RCU lookups become trivial. + */ +static inline int __must_check kref_get_unless_zero(struct kref *kref) +{ + return atomic_add_unless(&kref->refcount, 1, 0); +} #endif /* _KREF_H_ */ -- cgit v1.2.3 From d851718f65d646c5033a28fa60631440c6dc0d4f Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 8 Nov 2012 18:39:41 +0900 Subject: extcon: Add missing header file to extcon.h This patch add missing header file(sysfs.h/device.h) to extcon.h because 'struct extcon_dev' define attribute field(attr_g_muex/ attrs_muex/d_attrs_mues) and device_type field(extcon_dev_type). Signed-off-by: Chanwoo Choi Signed-off-by: Myungjoo Ham --- include/linux/extcon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 2c26c14cd710..54c00ce9ce7a 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -23,7 +23,9 @@ #ifndef __LINUX_EXTCON_H__ #define __LINUX_EXTCON_H__ +#include #include +#include #define SUPPORTED_CABLE_MAX 32 #define CABLE_NAME_MAX 30 -- cgit v1.2.3 From 43c1af0f4861b721def8c67ed6af2a69a4efcca3 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 13 Nov 2012 19:33:57 +0530 Subject: mfd: tps65910: Use regmap irq framework for interrupt support Implement irq support of tps65910 with regmap irq framework in place of implementing locally. This reduces the code size significantly and easy to maintain. Signed-off-by: Laxman Dewangan Reviewed-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/tps65910-irq.c | 375 +++++++++++++++++++++---------------------- include/linux/mfd/tps65910.h | 139 +++++++++++----- 2 files changed, 278 insertions(+), 236 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c index 09aab3e4776d..554543a584a1 100644 --- a/drivers/mfd/tps65910-irq.c +++ b/drivers/mfd/tps65910-irq.c @@ -24,171 +24,184 @@ #include #include -/* - * This is a threaded IRQ handler so can access I2C/SPI. Since all - * interrupts are clear on read the IRQ line will be reasserted and - * the physical IRQ will be handled again if another interrupt is - * asserted while we run - in the normal course of events this is a - * rare occurrence so we save I2C/SPI reads. We're also assuming that - * it's rare to get lots of interrupts firing simultaneously so try to - * minimise I/O. - */ -static irqreturn_t tps65910_irq(int irq, void *irq_data) -{ - struct tps65910 *tps65910 = irq_data; - unsigned int reg; - u32 irq_sts; - u32 irq_mask; - int i; - - tps65910_reg_read(tps65910, TPS65910_INT_STS, ®); - irq_sts = reg; - tps65910_reg_read(tps65910, TPS65910_INT_STS2, ®); - irq_sts |= reg << 8; - switch (tps65910_chip_id(tps65910)) { - case TPS65911: - tps65910_reg_read(tps65910, TPS65910_INT_STS3, ®); - irq_sts |= reg << 16; - } - - tps65910_reg_read(tps65910, TPS65910_INT_MSK, ®); - irq_mask = reg; - tps65910_reg_read(tps65910, TPS65910_INT_MSK2, ®); - irq_mask |= reg << 8; - switch (tps65910_chip_id(tps65910)) { - case TPS65911: - tps65910_reg_read(tps65910, TPS65910_INT_MSK3, ®); - irq_mask |= reg << 16; - } - - irq_sts &= ~irq_mask; - - if (!irq_sts) - return IRQ_NONE; - - for (i = 0; i < tps65910->irq_num; i++) { - - if (!(irq_sts & (1 << i))) - continue; - - handle_nested_irq(irq_find_mapping(tps65910->domain, i)); - } - - /* Write the STS register back to clear IRQs we handled */ - reg = irq_sts & 0xFF; - irq_sts >>= 8; - tps65910_reg_write(tps65910, TPS65910_INT_STS, reg); - reg = irq_sts & 0xFF; - tps65910_reg_write(tps65910, TPS65910_INT_STS2, reg); - switch (tps65910_chip_id(tps65910)) { - case TPS65911: - reg = irq_sts >> 8; - tps65910_reg_write(tps65910, TPS65910_INT_STS3, reg); - } - - return IRQ_HANDLED; -} - -static void tps65910_irq_lock(struct irq_data *data) -{ - struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); - - mutex_lock(&tps65910->irq_lock); -} - -static void tps65910_irq_sync_unlock(struct irq_data *data) -{ - struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); - u32 reg_mask; - unsigned int reg; - - tps65910_reg_read(tps65910, TPS65910_INT_MSK, ®); - reg_mask = reg; - tps65910_reg_read(tps65910, TPS65910_INT_MSK2, ®); - reg_mask |= reg << 8; - switch (tps65910_chip_id(tps65910)) { - case TPS65911: - tps65910_reg_read(tps65910, TPS65910_INT_MSK3, ®); - reg_mask |= reg << 16; - } - if (tps65910->irq_mask != reg_mask) { - reg = tps65910->irq_mask & 0xFF; - tps65910_reg_write(tps65910, TPS65910_INT_MSK, reg); - reg = tps65910->irq_mask >> 8 & 0xFF; - tps65910_reg_write(tps65910, TPS65910_INT_MSK2, reg); - switch (tps65910_chip_id(tps65910)) { - case TPS65911: - reg = tps65910->irq_mask >> 16; - tps65910_reg_write(tps65910, TPS65910_INT_MSK3, reg); - } - } - mutex_unlock(&tps65910->irq_lock); -} - -static void tps65910_irq_enable(struct irq_data *data) -{ - struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); - - tps65910->irq_mask &= ~(1 << data->hwirq); -} - -static void tps65910_irq_disable(struct irq_data *data) -{ - struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); - - tps65910->irq_mask |= (1 << data->hwirq); -} +static const struct regmap_irq tps65911_irqs[] = { + /* INT_STS */ + [TPS65911_IRQ_PWRHOLD_F] = { + .mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_VBAT_VMHI] = { + .mask = INT_MSK_VMBHI_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_PWRON] = { + .mask = INT_MSK_PWRON_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_PWRON_LP] = { + .mask = INT_MSK_PWRON_LP_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_PWRHOLD_R] = { + .mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_HOTDIE] = { + .mask = INT_MSK_HOTDIE_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_RTC_ALARM] = { + .mask = INT_MSK_RTC_ALARM_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_RTC_PERIOD] = { + .mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK, + .reg_offset = 0, + }, + + /* INT_STS2 */ + [TPS65911_IRQ_GPIO0_R] = { + .mask = INT_MSK2_GPIO0_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO0_F] = { + .mask = INT_MSK2_GPIO0_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO1_R] = { + .mask = INT_MSK2_GPIO1_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO1_F] = { + .mask = INT_MSK2_GPIO1_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO2_R] = { + .mask = INT_MSK2_GPIO2_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO2_F] = { + .mask = INT_MSK2_GPIO2_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO3_R] = { + .mask = INT_MSK2_GPIO3_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO3_F] = { + .mask = INT_MSK2_GPIO3_F_IT_MSK_MASK, + .reg_offset = 1, + }, + + /* INT_STS3 */ + [TPS65911_IRQ_GPIO4_R] = { + .mask = INT_MSK3_GPIO4_R_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_GPIO4_F] = { + .mask = INT_MSK3_GPIO4_F_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_GPIO5_R] = { + .mask = INT_MSK3_GPIO5_R_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_GPIO5_F] = { + .mask = INT_MSK3_GPIO5_F_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_WTCHDG] = { + .mask = INT_MSK3_WTCHDG_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_VMBCH2_H] = { + .mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_VMBCH2_L] = { + .mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_PWRDN] = { + .mask = INT_MSK3_PWRDN_IT_MSK_MASK, + .reg_offset = 2, + }, +}; -#ifdef CONFIG_PM_SLEEP -static int tps65910_irq_set_wake(struct irq_data *data, unsigned int enable) -{ - struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data); - return irq_set_irq_wake(tps65910->chip_irq, enable); -} -#else -#define tps65910_irq_set_wake NULL -#endif +static const struct regmap_irq tps65910_irqs[] = { + /* INT_STS */ + [TPS65910_IRQ_VBAT_VMBDCH] = { + .mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_VBAT_VMHI] = { + .mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_PWRON] = { + .mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_PWRON_LP] = { + .mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_PWRHOLD] = { + .mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_HOTDIE] = { + .mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_RTC_ALARM] = { + .mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_RTC_PERIOD] = { + .mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK, + .reg_offset = 0, + }, + + /* INT_STS2 */ + [TPS65910_IRQ_GPIO_R] = { + .mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65910_IRQ_GPIO_F] = { + .mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK, + .reg_offset = 1, + }, +}; -static struct irq_chip tps65910_irq_chip = { +static struct regmap_irq_chip tps65911_irq_chip = { .name = "tps65910", - .irq_bus_lock = tps65910_irq_lock, - .irq_bus_sync_unlock = tps65910_irq_sync_unlock, - .irq_disable = tps65910_irq_disable, - .irq_enable = tps65910_irq_enable, - .irq_set_wake = tps65910_irq_set_wake, + .irqs = tps65911_irqs, + .num_irqs = ARRAY_SIZE(tps65911_irqs), + .num_regs = 3, + .irq_reg_stride = 2, + .status_base = TPS65910_INT_STS, + .mask_base = TPS65910_INT_MSK, + .ack_base = TPS65910_INT_MSK, }; -static int tps65910_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) -{ - struct tps65910 *tps65910 = h->host_data; - - irq_set_chip_data(virq, tps65910); - irq_set_chip_and_handler(virq, &tps65910_irq_chip, handle_edge_irq); - irq_set_nested_thread(virq, 1); - - /* ARM needs us to explicitly flag the IRQ as valid - * and will set them noprobe when we do so. */ -#ifdef CONFIG_ARM - set_irq_flags(virq, IRQF_VALID); -#else - irq_set_noprobe(virq); -#endif - - return 0; -} - -static struct irq_domain_ops tps65910_domain_ops = { - .map = tps65910_irq_map, - .xlate = irq_domain_xlate_twocell, +static struct regmap_irq_chip tps65910_irq_chip = { + .name = "tps65910", + .irqs = tps65910_irqs, + .num_irqs = ARRAY_SIZE(tps65910_irqs), + .num_regs = 2, + .irq_reg_stride = 2, + .status_base = TPS65910_INT_STS, + .mask_base = TPS65910_INT_MSK, + .ack_base = TPS65910_INT_MSK, }; int tps65910_irq_init(struct tps65910 *tps65910, int irq, struct tps65910_platform_data *pdata) { - int ret; - int flags = IRQF_ONESHOT; + int ret = 0; + static struct regmap_irq_chip *tps6591x_irqs_chip; if (!irq) { dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n"); @@ -200,61 +213,31 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq, return -EINVAL; } + switch (tps65910_chip_id(tps65910)) { case TPS65910: - tps65910->irq_num = TPS65910_NUM_IRQ; + tps6591x_irqs_chip = &tps65910_irq_chip; break; case TPS65911: - tps65910->irq_num = TPS65911_NUM_IRQ; + tps6591x_irqs_chip = &tps65911_irq_chip; break; } - if (pdata->irq_base > 0) { - pdata->irq_base = irq_alloc_descs(pdata->irq_base, 0, - tps65910->irq_num, -1); - if (pdata->irq_base < 0) { - dev_warn(tps65910->dev, "Failed to alloc IRQs: %d\n", - pdata->irq_base); - return pdata->irq_base; - } - } - - tps65910->irq_mask = 0xFFFFFF; - - mutex_init(&tps65910->irq_lock); tps65910->chip_irq = irq; - tps65910->irq_base = pdata->irq_base; - - if (pdata->irq_base > 0) - tps65910->domain = irq_domain_add_legacy(tps65910->dev->of_node, - tps65910->irq_num, - pdata->irq_base, - 0, - &tps65910_domain_ops, tps65910); - else - tps65910->domain = irq_domain_add_linear(tps65910->dev->of_node, - tps65910->irq_num, - &tps65910_domain_ops, tps65910); - - if (!tps65910->domain) { - dev_err(tps65910->dev, "Failed to create IRQ domain\n"); - return -ENOMEM; + ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq, + IRQF_ONESHOT, pdata->irq_base, + tps6591x_irqs_chip, &tps65910->irq_data); + if (ret < 0) { + dev_warn(tps65910->dev, + "Failed to add irq_chip %d\n", ret); + return ret; } - - ret = request_threaded_irq(irq, NULL, tps65910_irq, flags, - "tps65910", tps65910); - - irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); - - if (ret != 0) - dev_err(tps65910->dev, "Failed to request IRQ: %d\n", ret); - return ret; } int tps65910_irq_exit(struct tps65910 *tps65910) { - if (tps65910->chip_irq) - free_irq(tps65910->chip_irq, tps65910); + if (tps65910->chip_irq > 0) + regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data); return 0; } diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 02e894f3ff45..b564ac29590a 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -572,6 +572,49 @@ #define SPARE_SPARE_MASK 0xFF #define SPARE_SPARE_SHIFT 0 +#define TPS65910_INT_STS_RTC_PERIOD_IT_MASK 0x80 +#define TPS65910_INT_STS_RTC_PERIOD_IT_SHIFT 7 +#define TPS65910_INT_STS_RTC_ALARM_IT_MASK 0x40 +#define TPS65910_INT_STS_RTC_ALARM_IT_SHIFT 6 +#define TPS65910_INT_STS_HOTDIE_IT_MASK 0x20 +#define TPS65910_INT_STS_HOTDIE_IT_SHIFT 5 +#define TPS65910_INT_STS_PWRHOLD_F_IT_MASK 0x10 +#define TPS65910_INT_STS_PWRHOLD_F_IT_SHIFT 4 +#define TPS65910_INT_STS_PWRON_LP_IT_MASK 0x08 +#define TPS65910_INT_STS_PWRON_LP_IT_SHIFT 3 +#define TPS65910_INT_STS_PWRON_IT_MASK 0x04 +#define TPS65910_INT_STS_PWRON_IT_SHIFT 2 +#define TPS65910_INT_STS_VMBHI_IT_MASK 0x02 +#define TPS65910_INT_STS_VMBHI_IT_SHIFT 1 +#define TPS65910_INT_STS_VMBDCH_IT_MASK 0x01 +#define TPS65910_INT_STS_VMBDCH_IT_SHIFT 0 + +#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK 0x80 +#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_SHIFT 7 +#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK 0x40 +#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_SHIFT 6 +#define TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK 0x20 +#define TPS65910_INT_MSK_HOTDIE_IT_MSK_SHIFT 5 +#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK 0x10 +#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_SHIFT 4 +#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK 0x08 +#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_SHIFT 3 +#define TPS65910_INT_MSK_PWRON_IT_MSK_MASK 0x04 +#define TPS65910_INT_MSK_PWRON_IT_MSK_SHIFT 2 +#define TPS65910_INT_MSK_VMBHI_IT_MSK_MASK 0x02 +#define TPS65910_INT_MSK_VMBHI_IT_MSK_SHIFT 1 +#define TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK 0x01 +#define TPS65910_INT_MSK_VMBDCH_IT_MSK_SHIFT 0 + +#define TPS65910_INT_STS2_GPIO0_F_IT_SHIFT 2 +#define TPS65910_INT_STS2_GPIO0_F_IT_MASK 0x02 +#define TPS65910_INT_STS2_GPIO0_R_IT_SHIFT 1 +#define TPS65910_INT_STS2_GPIO0_R_IT_MASK 0x01 + +#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_SHIFT 2 +#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK 0x02 +#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_SHIFT 1 +#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK 0x01 /*Register INT_STS (0x80) register.RegisterDescription */ #define INT_STS_RTC_PERIOD_IT_MASK 0x80 @@ -580,16 +623,16 @@ #define INT_STS_RTC_ALARM_IT_SHIFT 6 #define INT_STS_HOTDIE_IT_MASK 0x20 #define INT_STS_HOTDIE_IT_SHIFT 5 -#define INT_STS_PWRHOLD_IT_MASK 0x10 -#define INT_STS_PWRHOLD_IT_SHIFT 4 +#define INT_STS_PWRHOLD_R_IT_MASK 0x10 +#define INT_STS_PWRHOLD_R_IT_SHIFT 4 #define INT_STS_PWRON_LP_IT_MASK 0x08 #define INT_STS_PWRON_LP_IT_SHIFT 3 #define INT_STS_PWRON_IT_MASK 0x04 #define INT_STS_PWRON_IT_SHIFT 2 #define INT_STS_VMBHI_IT_MASK 0x02 #define INT_STS_VMBHI_IT_SHIFT 1 -#define INT_STS_VMBDCH_IT_MASK 0x01 -#define INT_STS_VMBDCH_IT_SHIFT 0 +#define INT_STS_PWRHOLD_F_IT_MASK 0x01 +#define INT_STS_PWRHOLD_F_IT_SHIFT 0 /*Register INT_MSK (0x80) register.RegisterDescription */ @@ -599,16 +642,16 @@ #define INT_MSK_RTC_ALARM_IT_MSK_SHIFT 6 #define INT_MSK_HOTDIE_IT_MSK_MASK 0x20 #define INT_MSK_HOTDIE_IT_MSK_SHIFT 5 -#define INT_MSK_PWRHOLD_IT_MSK_MASK 0x10 -#define INT_MSK_PWRHOLD_IT_MSK_SHIFT 4 +#define INT_MSK_PWRHOLD_R_IT_MSK_MASK 0x10 +#define INT_MSK_PWRHOLD_R_IT_MSK_SHIFT 4 #define INT_MSK_PWRON_LP_IT_MSK_MASK 0x08 #define INT_MSK_PWRON_LP_IT_MSK_SHIFT 3 #define INT_MSK_PWRON_IT_MSK_MASK 0x04 #define INT_MSK_PWRON_IT_MSK_SHIFT 2 #define INT_MSK_VMBHI_IT_MSK_MASK 0x02 #define INT_MSK_VMBHI_IT_MSK_SHIFT 1 -#define INT_MSK_VMBDCH_IT_MSK_MASK 0x01 -#define INT_MSK_VMBDCH_IT_MSK_SHIFT 0 +#define INT_MSK_PWRHOLD_F_IT_MSK_MASK 0x01 +#define INT_MSK_PWRHOLD_F_IT_MSK_SHIFT 0 /*Register INT_STS2 (0x80) register.RegisterDescription */ @@ -650,6 +693,14 @@ /*Register INT_STS3 (0x80) register.RegisterDescription */ +#define INT_STS3_PWRDN_IT_MASK 0x80 +#define INT_STS3_PWRDN_IT_SHIFT 7 +#define INT_STS3_VMBCH2_L_IT_MASK 0x40 +#define INT_STS3_VMBCH2_L_IT_SHIFT 6 +#define INT_STS3_VMBCH2_H_IT_MASK 0x20 +#define INT_STS3_VMBCH2_H_IT_SHIFT 5 +#define INT_STS3_WTCHDG_IT_MASK 0x10 +#define INT_STS3_WTCHDG_IT_SHIFT 4 #define INT_STS3_GPIO5_F_IT_MASK 0x08 #define INT_STS3_GPIO5_F_IT_SHIFT 3 #define INT_STS3_GPIO5_R_IT_MASK 0x04 @@ -661,6 +712,14 @@ /*Register INT_MSK3 (0x80) register.RegisterDescription */ +#define INT_MSK3_PWRDN_IT_MSK_MASK 0x80 +#define INT_MSK3_PWRDN_IT_MSK_SHIFT 7 +#define INT_MSK3_VMBCH2_L_IT_MSK_MASK 0x40 +#define INT_MSK3_VMBCH2_L_IT_MSK_SHIFT 6 +#define INT_MSK3_VMBCH2_H_IT_MSK_MASK 0x20 +#define INT_MSK3_VMBCH2_H_IT_MSK_SHIFT 5 +#define INT_MSK3_WTCHDG_IT_MSK_MASK 0x10 +#define INT_MSK3_WTCHDG_IT_MSK_SHIFT 4 #define INT_MSK3_GPIO5_F_IT_MSK_MASK 0x08 #define INT_MSK3_GPIO5_F_IT_MSK_SHIFT 3 #define INT_MSK3_GPIO5_R_IT_MSK_MASK 0x04 @@ -721,34 +780,32 @@ #define TPS65910_IRQ_GPIO_F 9 #define TPS65910_NUM_IRQ 10 -#define TPS65911_IRQ_VBAT_VMBDCH 0 -#define TPS65911_IRQ_VBAT_VMBDCH2L 1 -#define TPS65911_IRQ_VBAT_VMBDCH2H 2 -#define TPS65911_IRQ_VBAT_VMHI 3 -#define TPS65911_IRQ_PWRON 4 -#define TPS65911_IRQ_PWRON_LP 5 -#define TPS65911_IRQ_PWRHOLD_F 6 -#define TPS65911_IRQ_PWRHOLD_R 7 -#define TPS65911_IRQ_HOTDIE 8 -#define TPS65911_IRQ_RTC_ALARM 9 -#define TPS65911_IRQ_RTC_PERIOD 10 -#define TPS65911_IRQ_GPIO0_R 11 -#define TPS65911_IRQ_GPIO0_F 12 -#define TPS65911_IRQ_GPIO1_R 13 -#define TPS65911_IRQ_GPIO1_F 14 -#define TPS65911_IRQ_GPIO2_R 15 -#define TPS65911_IRQ_GPIO2_F 16 -#define TPS65911_IRQ_GPIO3_R 17 -#define TPS65911_IRQ_GPIO3_F 18 -#define TPS65911_IRQ_GPIO4_R 19 -#define TPS65911_IRQ_GPIO4_F 20 -#define TPS65911_IRQ_GPIO5_R 21 -#define TPS65911_IRQ_GPIO5_F 22 -#define TPS65911_IRQ_WTCHDG 23 -#define TPS65911_IRQ_PWRDN 24 - -#define TPS65911_NUM_IRQ 25 - +#define TPS65911_IRQ_PWRHOLD_F 0 +#define TPS65911_IRQ_VBAT_VMHI 1 +#define TPS65911_IRQ_PWRON 2 +#define TPS65911_IRQ_PWRON_LP 3 +#define TPS65911_IRQ_PWRHOLD_R 4 +#define TPS65911_IRQ_HOTDIE 5 +#define TPS65911_IRQ_RTC_ALARM 6 +#define TPS65911_IRQ_RTC_PERIOD 7 +#define TPS65911_IRQ_GPIO0_R 8 +#define TPS65911_IRQ_GPIO0_F 9 +#define TPS65911_IRQ_GPIO1_R 10 +#define TPS65911_IRQ_GPIO1_F 11 +#define TPS65911_IRQ_GPIO2_R 12 +#define TPS65911_IRQ_GPIO2_F 13 +#define TPS65911_IRQ_GPIO3_R 14 +#define TPS65911_IRQ_GPIO3_F 15 +#define TPS65911_IRQ_GPIO4_R 16 +#define TPS65911_IRQ_GPIO4_F 17 +#define TPS65911_IRQ_GPIO5_R 18 +#define TPS65911_IRQ_GPIO5_F 19 +#define TPS65911_IRQ_WTCHDG 20 +#define TPS65911_IRQ_VMBCH2_H 21 +#define TPS65911_IRQ_VMBCH2_L 22 +#define TPS65911_IRQ_PWRDN 23 + +#define TPS65911_NUM_IRQ 24 /* GPIO Register Definitions */ #define TPS65910_GPIO_DEB BIT(2) @@ -848,11 +905,8 @@ struct tps65910 { struct tps65910_board *of_plat_data; /* IRQ Handling */ - struct mutex irq_lock; int chip_irq; - int irq_base; - int irq_num; - u32 irq_mask; + struct regmap_irq_chip_data *irq_data; struct irq_domain *domain; }; @@ -900,4 +954,9 @@ static inline int tps65910_reg_update_bits(struct tps65910 *tps65910, u8 reg, return regmap_update_bits(tps65910->regmap, reg, mask, val); } +static inline int tps65910_irq_get_virq(struct tps65910 *tps65910, int irq) +{ + return regmap_irq_get_virq(tps65910->irq_data, irq); +} + #endif /* __LINUX_MFD_TPS65910_H */ -- cgit v1.2.3 From 4aab3fadad32ff4df05832beff7c16fd6ad938aa Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 13 Nov 2012 19:33:58 +0530 Subject: mfd: tps65910: Move interrupt implementation code to mfd file In place of implementing the irq support in separate file, moving implementation to main mfd file. The irq files only contains the table and init steps only and does not need extra file to have this only for this purpose. Signed-off-by: Laxman Dewangan Reviewed-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/Makefile | 2 +- drivers/mfd/tps65910-irq.c | 243 ------------------------------------------- drivers/mfd/tps65910.c | 216 ++++++++++++++++++++++++++++++++++++++ include/linux/mfd/tps65910.h | 4 - 4 files changed, 217 insertions(+), 248 deletions(-) delete mode 100644 drivers/mfd/tps65910-irq.c (limited to 'include/linux') diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 8a68fc7ea870..a30c49ecdd95 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -56,7 +56,7 @@ obj-$(CONFIG_TPS6105X) += tps6105x.o obj-$(CONFIG_TPS65010) += tps65010.o obj-$(CONFIG_TPS6507X) += tps6507x.o obj-$(CONFIG_MFD_TPS65217) += tps65217.o -obj-$(CONFIG_MFD_TPS65910) += tps65910.o tps65910-irq.o +obj-$(CONFIG_MFD_TPS65910) += tps65910.o tps65912-objs := tps65912-core.o tps65912-irq.o obj-$(CONFIG_MFD_TPS65912) += tps65912.o obj-$(CONFIG_MFD_TPS65912_I2C) += tps65912-i2c.o diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c deleted file mode 100644 index 554543a584a1..000000000000 --- a/drivers/mfd/tps65910-irq.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * tps65910-irq.c -- TI TPS6591x - * - * Copyright 2010 Texas Instruments Inc. - * - * Author: Graeme Gregory - * Author: Jorge Eduardo Candelaria - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -static const struct regmap_irq tps65911_irqs[] = { - /* INT_STS */ - [TPS65911_IRQ_PWRHOLD_F] = { - .mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65911_IRQ_VBAT_VMHI] = { - .mask = INT_MSK_VMBHI_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65911_IRQ_PWRON] = { - .mask = INT_MSK_PWRON_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65911_IRQ_PWRON_LP] = { - .mask = INT_MSK_PWRON_LP_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65911_IRQ_PWRHOLD_R] = { - .mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65911_IRQ_HOTDIE] = { - .mask = INT_MSK_HOTDIE_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65911_IRQ_RTC_ALARM] = { - .mask = INT_MSK_RTC_ALARM_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65911_IRQ_RTC_PERIOD] = { - .mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK, - .reg_offset = 0, - }, - - /* INT_STS2 */ - [TPS65911_IRQ_GPIO0_R] = { - .mask = INT_MSK2_GPIO0_R_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65911_IRQ_GPIO0_F] = { - .mask = INT_MSK2_GPIO0_F_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65911_IRQ_GPIO1_R] = { - .mask = INT_MSK2_GPIO1_R_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65911_IRQ_GPIO1_F] = { - .mask = INT_MSK2_GPIO1_F_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65911_IRQ_GPIO2_R] = { - .mask = INT_MSK2_GPIO2_R_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65911_IRQ_GPIO2_F] = { - .mask = INT_MSK2_GPIO2_F_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65911_IRQ_GPIO3_R] = { - .mask = INT_MSK2_GPIO3_R_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65911_IRQ_GPIO3_F] = { - .mask = INT_MSK2_GPIO3_F_IT_MSK_MASK, - .reg_offset = 1, - }, - - /* INT_STS3 */ - [TPS65911_IRQ_GPIO4_R] = { - .mask = INT_MSK3_GPIO4_R_IT_MSK_MASK, - .reg_offset = 2, - }, - [TPS65911_IRQ_GPIO4_F] = { - .mask = INT_MSK3_GPIO4_F_IT_MSK_MASK, - .reg_offset = 2, - }, - [TPS65911_IRQ_GPIO5_R] = { - .mask = INT_MSK3_GPIO5_R_IT_MSK_MASK, - .reg_offset = 2, - }, - [TPS65911_IRQ_GPIO5_F] = { - .mask = INT_MSK3_GPIO5_F_IT_MSK_MASK, - .reg_offset = 2, - }, - [TPS65911_IRQ_WTCHDG] = { - .mask = INT_MSK3_WTCHDG_IT_MSK_MASK, - .reg_offset = 2, - }, - [TPS65911_IRQ_VMBCH2_H] = { - .mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK, - .reg_offset = 2, - }, - [TPS65911_IRQ_VMBCH2_L] = { - .mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK, - .reg_offset = 2, - }, - [TPS65911_IRQ_PWRDN] = { - .mask = INT_MSK3_PWRDN_IT_MSK_MASK, - .reg_offset = 2, - }, -}; - -static const struct regmap_irq tps65910_irqs[] = { - /* INT_STS */ - [TPS65910_IRQ_VBAT_VMBDCH] = { - .mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65910_IRQ_VBAT_VMHI] = { - .mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65910_IRQ_PWRON] = { - .mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65910_IRQ_PWRON_LP] = { - .mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65910_IRQ_PWRHOLD] = { - .mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65910_IRQ_HOTDIE] = { - .mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65910_IRQ_RTC_ALARM] = { - .mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK, - .reg_offset = 0, - }, - [TPS65910_IRQ_RTC_PERIOD] = { - .mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK, - .reg_offset = 0, - }, - - /* INT_STS2 */ - [TPS65910_IRQ_GPIO_R] = { - .mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK, - .reg_offset = 1, - }, - [TPS65910_IRQ_GPIO_F] = { - .mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK, - .reg_offset = 1, - }, -}; - -static struct regmap_irq_chip tps65911_irq_chip = { - .name = "tps65910", - .irqs = tps65911_irqs, - .num_irqs = ARRAY_SIZE(tps65911_irqs), - .num_regs = 3, - .irq_reg_stride = 2, - .status_base = TPS65910_INT_STS, - .mask_base = TPS65910_INT_MSK, - .ack_base = TPS65910_INT_MSK, -}; - -static struct regmap_irq_chip tps65910_irq_chip = { - .name = "tps65910", - .irqs = tps65910_irqs, - .num_irqs = ARRAY_SIZE(tps65910_irqs), - .num_regs = 2, - .irq_reg_stride = 2, - .status_base = TPS65910_INT_STS, - .mask_base = TPS65910_INT_MSK, - .ack_base = TPS65910_INT_MSK, -}; - -int tps65910_irq_init(struct tps65910 *tps65910, int irq, - struct tps65910_platform_data *pdata) -{ - int ret = 0; - static struct regmap_irq_chip *tps6591x_irqs_chip; - - if (!irq) { - dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n"); - return -EINVAL; - } - - if (!pdata) { - dev_warn(tps65910->dev, "No interrupt support, no pdata\n"); - return -EINVAL; - } - - - switch (tps65910_chip_id(tps65910)) { - case TPS65910: - tps6591x_irqs_chip = &tps65910_irq_chip; - break; - case TPS65911: - tps6591x_irqs_chip = &tps65911_irq_chip; - break; - } - - tps65910->chip_irq = irq; - ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq, - IRQF_ONESHOT, pdata->irq_base, - tps6591x_irqs_chip, &tps65910->irq_data); - if (ret < 0) { - dev_warn(tps65910->dev, - "Failed to add irq_chip %d\n", ret); - return ret; - } - return ret; -} - -int tps65910_irq_exit(struct tps65910 *tps65910) -{ - if (tps65910->chip_irq > 0) - regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data); - return 0; -} diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index 27fbbe510101..d4d4eb5b5b6f 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -19,6 +19,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -50,6 +53,219 @@ static struct mfd_cell tps65910s[] = { }; +static const struct regmap_irq tps65911_irqs[] = { + /* INT_STS */ + [TPS65911_IRQ_PWRHOLD_F] = { + .mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_VBAT_VMHI] = { + .mask = INT_MSK_VMBHI_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_PWRON] = { + .mask = INT_MSK_PWRON_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_PWRON_LP] = { + .mask = INT_MSK_PWRON_LP_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_PWRHOLD_R] = { + .mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_HOTDIE] = { + .mask = INT_MSK_HOTDIE_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_RTC_ALARM] = { + .mask = INT_MSK_RTC_ALARM_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65911_IRQ_RTC_PERIOD] = { + .mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK, + .reg_offset = 0, + }, + + /* INT_STS2 */ + [TPS65911_IRQ_GPIO0_R] = { + .mask = INT_MSK2_GPIO0_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO0_F] = { + .mask = INT_MSK2_GPIO0_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO1_R] = { + .mask = INT_MSK2_GPIO1_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO1_F] = { + .mask = INT_MSK2_GPIO1_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO2_R] = { + .mask = INT_MSK2_GPIO2_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO2_F] = { + .mask = INT_MSK2_GPIO2_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO3_R] = { + .mask = INT_MSK2_GPIO3_R_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65911_IRQ_GPIO3_F] = { + .mask = INT_MSK2_GPIO3_F_IT_MSK_MASK, + .reg_offset = 1, + }, + + /* INT_STS2 */ + [TPS65911_IRQ_GPIO4_R] = { + .mask = INT_MSK3_GPIO4_R_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_GPIO4_F] = { + .mask = INT_MSK3_GPIO4_F_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_GPIO5_R] = { + .mask = INT_MSK3_GPIO5_R_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_GPIO5_F] = { + .mask = INT_MSK3_GPIO5_F_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_WTCHDG] = { + .mask = INT_MSK3_WTCHDG_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_VMBCH2_H] = { + .mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_VMBCH2_L] = { + .mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK, + .reg_offset = 2, + }, + [TPS65911_IRQ_PWRDN] = { + .mask = INT_MSK3_PWRDN_IT_MSK_MASK, + .reg_offset = 2, + }, +}; + +static const struct regmap_irq tps65910_irqs[] = { + /* INT_STS */ + [TPS65910_IRQ_VBAT_VMBDCH] = { + .mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_VBAT_VMHI] = { + .mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_PWRON] = { + .mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_PWRON_LP] = { + .mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_PWRHOLD] = { + .mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_HOTDIE] = { + .mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_RTC_ALARM] = { + .mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK, + .reg_offset = 0, + }, + [TPS65910_IRQ_RTC_PERIOD] = { + .mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK, + .reg_offset = 0, + }, + + /* INT_STS2 */ + [TPS65910_IRQ_GPIO_R] = { + .mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK, + .reg_offset = 1, + }, + [TPS65910_IRQ_GPIO_F] = { + .mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK, + .reg_offset = 1, + }, +}; + +static struct regmap_irq_chip tps65911_irq_chip = { + .name = "tps65910", + .irqs = tps65911_irqs, + .num_irqs = ARRAY_SIZE(tps65911_irqs), + .num_regs = 3, + .irq_reg_stride = 2, + .status_base = TPS65910_INT_STS, + .mask_base = TPS65910_INT_MSK, + .ack_base = TPS65910_INT_MSK, +}; + +static struct regmap_irq_chip tps65910_irq_chip = { + .name = "tps65910", + .irqs = tps65910_irqs, + .num_irqs = ARRAY_SIZE(tps65910_irqs), + .num_regs = 2, + .irq_reg_stride = 2, + .status_base = TPS65910_INT_STS, + .mask_base = TPS65910_INT_MSK, + .ack_base = TPS65910_INT_MSK, +}; + +static int tps65910_irq_init(struct tps65910 *tps65910, int irq, + struct tps65910_platform_data *pdata) +{ + int ret = 0; + static struct regmap_irq_chip *tps6591x_irqs_chip; + + if (!irq) { + dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n"); + return -EINVAL; + } + + if (!pdata) { + dev_warn(tps65910->dev, "No interrupt support, no pdata\n"); + return -EINVAL; + } + + switch (tps65910_chip_id(tps65910)) { + case TPS65910: + tps6591x_irqs_chip = &tps65910_irq_chip; + break; + case TPS65911: + tps6591x_irqs_chip = &tps65911_irq_chip; + break; + } + + tps65910->chip_irq = irq; + ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq, + IRQF_ONESHOT, pdata->irq_base, + tps6591x_irqs_chip, &tps65910->irq_data); + if (ret < 0) + dev_warn(tps65910->dev, "Failed to add irq_chip %d\n", ret); + return ret; +} + +static int tps65910_irq_exit(struct tps65910 *tps65910) +{ + if (tps65910->chip_irq > 0) + regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data); + return 0; +} + static bool is_volatile_reg(struct device *dev, unsigned int reg) { struct tps65910 *tps65910 = dev_get_drvdata(dev); diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index b564ac29590a..0b16903f7e88 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -915,10 +915,6 @@ struct tps65910_platform_data { int irq_base; }; -int tps65910_irq_init(struct tps65910 *tps65910, int irq, - struct tps65910_platform_data *pdata); -int tps65910_irq_exit(struct tps65910 *tps65910); - static inline int tps65910_chip_id(struct tps65910 *tps65910) { return tps65910->id; -- cgit v1.2.3 From 8bad1abd6303476d6f77878aa8ea737d5d1b625c Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Oct 2012 00:15:05 -0300 Subject: mfd: da9052: Introduce da9052-irq.c Create a da9052-irq.c file so that it can handle interrupt related functions. This is useful for allowing the da9052 drivers to use such functions when dealing with da9052 interrupts. Signed-off-by: Fabio Estevam Acked-by: Arnd Bergmann Reviewed-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/Makefile | 1 + drivers/mfd/da9052-core.c | 271 ++--------------------------------- drivers/mfd/da9052-irq.c | 288 ++++++++++++++++++++++++++++++++++++++ include/linux/mfd/da9052/da9052.h | 10 ++ 4 files changed, 307 insertions(+), 263 deletions(-) create mode 100644 drivers/mfd/da9052-irq.c (limited to 'include/linux') diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index a30c49ecdd95..632cce09e407 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -90,6 +90,7 @@ obj-$(CONFIG_UCB1400_CORE) += ucb1400_core.o obj-$(CONFIG_PMIC_DA903X) += da903x.o +obj-$(CONFIG_PMIC_DA9052) += da9052-irq.o obj-$(CONFIG_PMIC_DA9052) += da9052-core.o obj-$(CONFIG_MFD_DA9052_SPI) += da9052-spi.o obj-$(CONFIG_MFD_DA9052_I2C) += da9052-i2c.o diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c index c96cdbc0daff..2153f9bba9ef 100644 --- a/drivers/mfd/da9052-core.c +++ b/drivers/mfd/da9052-core.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -24,16 +23,6 @@ #include #include -#define DA9052_NUM_IRQ_REGS 4 -#define DA9052_IRQ_MASK_POS_1 0x01 -#define DA9052_IRQ_MASK_POS_2 0x02 -#define DA9052_IRQ_MASK_POS_3 0x04 -#define DA9052_IRQ_MASK_POS_4 0x08 -#define DA9052_IRQ_MASK_POS_5 0x10 -#define DA9052_IRQ_MASK_POS_6 0x20 -#define DA9052_IRQ_MASK_POS_7 0x40 -#define DA9052_IRQ_MASK_POS_8 0x80 - static bool da9052_reg_readable(struct device *dev, unsigned int reg) { switch (reg) { @@ -425,15 +414,6 @@ err: } EXPORT_SYMBOL_GPL(da9052_adc_manual_read); -static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data) -{ - struct da9052 *da9052 = irq_data; - - complete(&da9052->done); - - return IRQ_HANDLED; -} - int da9052_adc_read_temp(struct da9052 *da9052) { int tbat; @@ -447,74 +427,6 @@ int da9052_adc_read_temp(struct da9052 *da9052) } EXPORT_SYMBOL_GPL(da9052_adc_read_temp); -static struct resource da9052_rtc_resource = { - .name = "ALM", - .start = DA9052_IRQ_ALARM, - .end = DA9052_IRQ_ALARM, - .flags = IORESOURCE_IRQ, -}; - -static struct resource da9052_onkey_resource = { - .name = "ONKEY", - .start = DA9052_IRQ_NONKEY, - .end = DA9052_IRQ_NONKEY, - .flags = IORESOURCE_IRQ, -}; - -static struct resource da9052_bat_resources[] = { - { - .name = "BATT TEMP", - .start = DA9052_IRQ_TBAT, - .end = DA9052_IRQ_TBAT, - .flags = IORESOURCE_IRQ, - }, - { - .name = "DCIN DET", - .start = DA9052_IRQ_DCIN, - .end = DA9052_IRQ_DCIN, - .flags = IORESOURCE_IRQ, - }, - { - .name = "DCIN REM", - .start = DA9052_IRQ_DCINREM, - .end = DA9052_IRQ_DCINREM, - .flags = IORESOURCE_IRQ, - }, - { - .name = "VBUS DET", - .start = DA9052_IRQ_VBUS, - .end = DA9052_IRQ_VBUS, - .flags = IORESOURCE_IRQ, - }, - { - .name = "VBUS REM", - .start = DA9052_IRQ_VBUSREM, - .end = DA9052_IRQ_VBUSREM, - .flags = IORESOURCE_IRQ, - }, - { - .name = "CHG END", - .start = DA9052_IRQ_CHGEND, - .end = DA9052_IRQ_CHGEND, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct resource da9052_tsi_resources[] = { - { - .name = "PENDWN", - .start = DA9052_IRQ_PENDOWN, - .end = DA9052_IRQ_PENDOWN, - .flags = IORESOURCE_IRQ, - }, - { - .name = "TSIRDY", - .start = DA9052_IRQ_TSIREADY, - .end = DA9052_IRQ_TSIREADY, - .flags = IORESOURCE_IRQ, - }, -}; - static struct mfd_cell __devinitdata da9052_subdev_info[] = { { .name = "da9052-regulator", @@ -574,13 +486,9 @@ static struct mfd_cell __devinitdata da9052_subdev_info[] = { }, { .name = "da9052-onkey", - .resources = &da9052_onkey_resource, - .num_resources = 1, }, { .name = "da9052-rtc", - .resources = &da9052_rtc_resource, - .num_resources = 1, }, { .name = "da9052-gpio", @@ -602,160 +510,15 @@ static struct mfd_cell __devinitdata da9052_subdev_info[] = { }, { .name = "da9052-tsi", - .resources = da9052_tsi_resources, - .num_resources = ARRAY_SIZE(da9052_tsi_resources), }, { .name = "da9052-bat", - .resources = da9052_bat_resources, - .num_resources = ARRAY_SIZE(da9052_bat_resources), }, { .name = "da9052-watchdog", }, }; -static struct regmap_irq da9052_irqs[] = { - [DA9052_IRQ_DCIN] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_1, - }, - [DA9052_IRQ_VBUS] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_2, - }, - [DA9052_IRQ_DCINREM] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_3, - }, - [DA9052_IRQ_VBUSREM] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_4, - }, - [DA9052_IRQ_VDDLOW] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_5, - }, - [DA9052_IRQ_ALARM] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_6, - }, - [DA9052_IRQ_SEQRDY] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_7, - }, - [DA9052_IRQ_COMP1V2] = { - .reg_offset = 0, - .mask = DA9052_IRQ_MASK_POS_8, - }, - [DA9052_IRQ_NONKEY] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_1, - }, - [DA9052_IRQ_IDFLOAT] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_2, - }, - [DA9052_IRQ_IDGND] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_3, - }, - [DA9052_IRQ_CHGEND] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_4, - }, - [DA9052_IRQ_TBAT] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_5, - }, - [DA9052_IRQ_ADC_EOM] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_6, - }, - [DA9052_IRQ_PENDOWN] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_7, - }, - [DA9052_IRQ_TSIREADY] = { - .reg_offset = 1, - .mask = DA9052_IRQ_MASK_POS_8, - }, - [DA9052_IRQ_GPI0] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_1, - }, - [DA9052_IRQ_GPI1] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_2, - }, - [DA9052_IRQ_GPI2] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_3, - }, - [DA9052_IRQ_GPI3] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_4, - }, - [DA9052_IRQ_GPI4] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_5, - }, - [DA9052_IRQ_GPI5] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_6, - }, - [DA9052_IRQ_GPI6] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_7, - }, - [DA9052_IRQ_GPI7] = { - .reg_offset = 2, - .mask = DA9052_IRQ_MASK_POS_8, - }, - [DA9052_IRQ_GPI8] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_1, - }, - [DA9052_IRQ_GPI9] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_2, - }, - [DA9052_IRQ_GPI10] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_3, - }, - [DA9052_IRQ_GPI11] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_4, - }, - [DA9052_IRQ_GPI12] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_5, - }, - [DA9052_IRQ_GPI13] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_6, - }, - [DA9052_IRQ_GPI14] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_7, - }, - [DA9052_IRQ_GPI15] = { - .reg_offset = 3, - .mask = DA9052_IRQ_MASK_POS_8, - }, -}; - -static struct regmap_irq_chip da9052_regmap_irq_chip = { - .name = "da9052_irq", - .status_base = DA9052_EVENT_A_REG, - .mask_base = DA9052_IRQ_MASK_A_REG, - .ack_base = DA9052_EVENT_A_REG, - .num_regs = DA9052_NUM_IRQ_REGS, - .irqs = da9052_irqs, - .num_irqs = ARRAY_SIZE(da9052_irqs), -}; - struct regmap_config da9052_regmap_config = { .reg_bits = 8, .val_bits = 8, @@ -769,15 +532,10 @@ struct regmap_config da9052_regmap_config = { }; EXPORT_SYMBOL_GPL(da9052_regmap_config); -static int da9052_map_irq(struct da9052 *da9052, int irq) -{ - return regmap_irq_get_virq(da9052->irq_data, irq); -} - int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id) { struct da9052_pdata *pdata = da9052->dev->platform_data; - int ret, i; + int ret; mutex_init(&da9052->auxadc_lock); init_completion(&da9052->done); @@ -787,22 +545,12 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id) da9052->chip_id = chip_id; - ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - -1, &da9052_regmap_irq_chip, - &da9052->irq_data); - if (ret < 0) { - dev_err(da9052->dev, "regmap_add_irq_chip failed: %d\n", ret); - goto regmap_err; + ret = da9052_irq_init(da9052); + if (ret != 0) { + dev_err(da9052->dev, "da9052_irq_init failed: %d\n", ret); + return ret; } - i = da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM); - ret = request_threaded_irq(i, NULL, da9052_auxadc_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - "adc-irq", da9052); - if (ret != 0) - dev_err(da9052->dev, "DA9052 ADC IRQ failed ret=%d\n", ret); - ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info, ARRAY_SIZE(da9052_subdev_info), NULL, 0, NULL); if (ret) { @@ -813,18 +561,15 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id) return 0; err: - free_irq(da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM), da9052); - regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data); - mfd_remove_devices(da9052->dev); -regmap_err: + da9052_irq_exit(da9052); + return ret; } void da9052_device_exit(struct da9052 *da9052) { - free_irq(da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM), da9052); - regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data); mfd_remove_devices(da9052->dev); + da9052_irq_exit(da9052); } MODULE_AUTHOR("David Dajun Chen "); diff --git a/drivers/mfd/da9052-irq.c b/drivers/mfd/da9052-irq.c new file mode 100644 index 000000000000..57ae7841f536 --- /dev/null +++ b/drivers/mfd/da9052-irq.c @@ -0,0 +1,288 @@ +/* + * DA9052 interrupt support + * + * Author: Fabio Estevam + * Based on arizona-irq.c, which is: + * + * Copyright 2012 Wolfson Microelectronics plc + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define DA9052_NUM_IRQ_REGS 4 +#define DA9052_IRQ_MASK_POS_1 0x01 +#define DA9052_IRQ_MASK_POS_2 0x02 +#define DA9052_IRQ_MASK_POS_3 0x04 +#define DA9052_IRQ_MASK_POS_4 0x08 +#define DA9052_IRQ_MASK_POS_5 0x10 +#define DA9052_IRQ_MASK_POS_6 0x20 +#define DA9052_IRQ_MASK_POS_7 0x40 +#define DA9052_IRQ_MASK_POS_8 0x80 + +static struct regmap_irq da9052_irqs[] = { + [DA9052_IRQ_DCIN] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_1, + }, + [DA9052_IRQ_VBUS] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_2, + }, + [DA9052_IRQ_DCINREM] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_3, + }, + [DA9052_IRQ_VBUSREM] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_4, + }, + [DA9052_IRQ_VDDLOW] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_5, + }, + [DA9052_IRQ_ALARM] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_6, + }, + [DA9052_IRQ_SEQRDY] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_7, + }, + [DA9052_IRQ_COMP1V2] = { + .reg_offset = 0, + .mask = DA9052_IRQ_MASK_POS_8, + }, + [DA9052_IRQ_NONKEY] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_1, + }, + [DA9052_IRQ_IDFLOAT] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_2, + }, + [DA9052_IRQ_IDGND] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_3, + }, + [DA9052_IRQ_CHGEND] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_4, + }, + [DA9052_IRQ_TBAT] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_5, + }, + [DA9052_IRQ_ADC_EOM] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_6, + }, + [DA9052_IRQ_PENDOWN] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_7, + }, + [DA9052_IRQ_TSIREADY] = { + .reg_offset = 1, + .mask = DA9052_IRQ_MASK_POS_8, + }, + [DA9052_IRQ_GPI0] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_1, + }, + [DA9052_IRQ_GPI1] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_2, + }, + [DA9052_IRQ_GPI2] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_3, + }, + [DA9052_IRQ_GPI3] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_4, + }, + [DA9052_IRQ_GPI4] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_5, + }, + [DA9052_IRQ_GPI5] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_6, + }, + [DA9052_IRQ_GPI6] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_7, + }, + [DA9052_IRQ_GPI7] = { + .reg_offset = 2, + .mask = DA9052_IRQ_MASK_POS_8, + }, + [DA9052_IRQ_GPI8] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_1, + }, + [DA9052_IRQ_GPI9] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_2, + }, + [DA9052_IRQ_GPI10] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_3, + }, + [DA9052_IRQ_GPI11] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_4, + }, + [DA9052_IRQ_GPI12] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_5, + }, + [DA9052_IRQ_GPI13] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_6, + }, + [DA9052_IRQ_GPI14] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_7, + }, + [DA9052_IRQ_GPI15] = { + .reg_offset = 3, + .mask = DA9052_IRQ_MASK_POS_8, + }, +}; + +static struct regmap_irq_chip da9052_regmap_irq_chip = { + .name = "da9052_irq", + .status_base = DA9052_EVENT_A_REG, + .mask_base = DA9052_IRQ_MASK_A_REG, + .ack_base = DA9052_EVENT_A_REG, + .num_regs = DA9052_NUM_IRQ_REGS, + .irqs = da9052_irqs, + .num_irqs = ARRAY_SIZE(da9052_irqs), +}; + +static int da9052_map_irq(struct da9052 *da9052, int irq) +{ + return regmap_irq_get_virq(da9052->irq_data, irq); +} + +int da9052_enable_irq(struct da9052 *da9052, int irq) +{ + irq = da9052_map_irq(da9052, irq); + if (irq < 0) + return irq; + + enable_irq(irq); + + return 0; +} +EXPORT_SYMBOL_GPL(da9052_enable_irq); + +int da9052_disable_irq(struct da9052 *da9052, int irq) +{ + irq = da9052_map_irq(da9052, irq); + if (irq < 0) + return irq; + + disable_irq(irq); + + return 0; +} +EXPORT_SYMBOL_GPL(da9052_disable_irq); + +int da9052_disable_irq_nosync(struct da9052 *da9052, int irq) +{ + irq = da9052_map_irq(da9052, irq); + if (irq < 0) + return irq; + + disable_irq_nosync(irq); + + return 0; +} +EXPORT_SYMBOL_GPL(da9052_disable_irq_nosync); + +int da9052_request_irq(struct da9052 *da9052, int irq, char *name, + irq_handler_t handler, void *data) +{ + irq = da9052_map_irq(da9052, irq); + if (irq < 0) + return irq; + + return request_threaded_irq(irq, NULL, handler, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + name, data); +} +EXPORT_SYMBOL_GPL(da9052_request_irq); + +void da9052_free_irq(struct da9052 *da9052, int irq, void *data) +{ + irq = da9052_map_irq(da9052, irq); + if (irq < 0) + return; + + free_irq(irq, data); +} +EXPORT_SYMBOL_GPL(da9052_free_irq); + +static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data) +{ + struct da9052 *da9052 = irq_data; + + complete(&da9052->done); + + return IRQ_HANDLED; +} + +int da9052_irq_init(struct da9052 *da9052) +{ + int ret; + + ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + -1, &da9052_regmap_irq_chip, + &da9052->irq_data); + if (ret < 0) { + dev_err(da9052->dev, "regmap_add_irq_chip failed: %d\n", ret); + goto regmap_err; + } + + ret = da9052_request_irq(da9052, DA9052_IRQ_ADC_EOM, "adc-irq", + da9052_auxadc_irq, da9052); + + if (ret != 0) { + dev_err(da9052->dev, "DA9052_IRQ_ADC_EOM failed: %d\n", ret); + goto request_irq_err; + } + + return 0; + +request_irq_err: + regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data); +regmap_err: + return ret; + +} + +int da9052_irq_exit(struct da9052 *da9052) +{ + da9052_free_irq(da9052, DA9052_IRQ_ADC_EOM , da9052); + regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data); + + return 0; +} diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index 0507c4c21a7d..86dd93de6ff2 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -146,4 +146,14 @@ void da9052_device_exit(struct da9052 *da9052); extern struct regmap_config da9052_regmap_config; +int da9052_irq_init(struct da9052 *da9052); +int da9052_irq_exit(struct da9052 *da9052); +int da9052_request_irq(struct da9052 *da9052, int irq, char *name, + irq_handler_t handler, void *data); +void da9052_free_irq(struct da9052 *da9052, int irq, void *data); + +int da9052_enable_irq(struct da9052 *da9052, int irq); +int da9052_disable_irq(struct da9052 *da9052, int irq); +int da9052_disable_irq_nosync(struct da9052 *da9052, int irq); + #endif /* __MFD_DA9052_DA9052_H */ -- cgit v1.2.3 From f01312d846016dbd38cc9865e580298fb61f2aa7 Mon Sep 17 00:00:00 2001 From: Lars Poeschel Date: Mon, 5 Nov 2012 15:48:23 +0100 Subject: mfd: Add viperboard driver Add mfd driver for Nano River Technologies viperboard. Signed-off-by: Lars Poeschel Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 14 +++++ drivers/mfd/Makefile | 1 + drivers/mfd/viperboard.c | 129 +++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/viperboard.h | 105 +++++++++++++++++++++++++++++++++ 4 files changed, 249 insertions(+) create mode 100644 drivers/mfd/viperboard.c create mode 100644 include/linux/mfd/viperboard.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 34242cada125..b280639a7634 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1065,6 +1065,20 @@ config MFD_PALMAS If you say yes here you get support for the Palmas series of PMIC chips from Texas Instruments. +config MFD_VIPERBOARD + tristate "Support for Nano River Technologies Viperboard" + select MFD_CORE + depends on USB + default n + help + Say yes here if you want support for Nano River Technologies + Viperboard. + There are mfd cell drivers available for i2c master, adc and + both gpios found on the board. The spi part does not yet + have a driver. + You need to select the mfd cell drivers separately. + The drivers do not support all features the board exposes. + endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 632cce09e407..1c3ee7c76906 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -139,6 +139,7 @@ obj-$(CONFIG_MFD_TPS65090) += tps65090.o obj-$(CONFIG_MFD_AAT2870_CORE) += aat2870-core.o obj-$(CONFIG_MFD_INTEL_MSIC) += intel_msic.o obj-$(CONFIG_MFD_PALMAS) += palmas.o +obj-$(CONFIG_MFD_VIPERBOARD) += viperboard.o obj-$(CONFIG_MFD_RC5T583) += rc5t583.o rc5t583-irq.o obj-$(CONFIG_MFD_SEC_CORE) += sec-core.o sec-irq.o obj-$(CONFIG_MFD_SYSCON) += syscon.o diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c new file mode 100644 index 000000000000..cd1e2fe95647 --- /dev/null +++ b/drivers/mfd/viperboard.c @@ -0,0 +1,129 @@ +/* + * Nano River Technologies viperboard driver + * + * This is the core driver for the viperboard. There are cell drivers + * available for I2C, ADC and both GPIOs. SPI is not yet supported. + * The drivers do not support all features the board exposes. See user + * manual of the viperboard. + * + * (C) 2012 by Lemonage GmbH + * Author: Lars Poeschel + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + + +static const struct usb_device_id vprbrd_table[] = { + { USB_DEVICE(0x2058, 0x1005) }, /* Nano River Technologies */ + { } /* Terminating entry */ +}; + +MODULE_DEVICE_TABLE(usb, vprbrd_table); + +static struct mfd_cell vprbrd_devs[] = { +}; + +static int vprbrd_probe(struct usb_interface *interface, + const struct usb_device_id *id) +{ + struct vprbrd *vb; + + u16 version = 0; + int pipe, ret; + unsigned char buf[1]; + + /* allocate memory for our device state and initialize it */ + vb = kzalloc(sizeof(*vb), GFP_KERNEL); + if (vb == NULL) { + dev_err(&interface->dev, "Out of memory\n"); + return -ENOMEM; + } + + mutex_init(&vb->lock); + + vb->usb_dev = usb_get_dev(interface_to_usbdev(interface)); + + /* save our data pointer in this interface device */ + usb_set_intfdata(interface, vb); + dev_set_drvdata(&vb->pdev.dev, vb); + + /* get version information, major first, minor then */ + pipe = usb_rcvctrlpipe(vb->usb_dev, 0); + ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MAJOR, + VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, buf, 1, + VPRBRD_USB_TIMEOUT_MS); + if (ret == 1) + version = buf[0]; + + ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MINOR, + VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, buf, 1, + VPRBRD_USB_TIMEOUT_MS); + if (ret == 1) { + version <<= 8; + version = version | buf[0]; + } + + dev_info(&interface->dev, + "version %x.%02x found at bus %03d address %03d\n", + version >> 8, version & 0xff, + vb->usb_dev->bus->busnum, vb->usb_dev->devnum); + + ret = mfd_add_devices(&interface->dev, -1, vprbrd_devs, + ARRAY_SIZE(vprbrd_devs), NULL, 0, NULL); + if (ret != 0) { + dev_err(&interface->dev, "Failed to add mfd devices to core."); + goto error; + } + + return 0; + +error: + if (vb) { + usb_put_dev(vb->usb_dev); + kfree(vb); + } + + return ret; +} + +static void vprbrd_disconnect(struct usb_interface *interface) +{ + struct vprbrd *vb = usb_get_intfdata(interface); + + mfd_remove_devices(&interface->dev); + usb_set_intfdata(interface, NULL); + usb_put_dev(vb->usb_dev); + kfree(vb); + + dev_dbg(&interface->dev, "disconnected\n"); +} + +static struct usb_driver vprbrd_driver = { + .name = "viperboard", + .probe = vprbrd_probe, + .disconnect = vprbrd_disconnect, + .id_table = vprbrd_table, +}; + +module_usb_driver(vprbrd_driver); + +MODULE_DESCRIPTION("Nano River Technologies viperboard mfd core driver"); +MODULE_AUTHOR("Lars Poeschel "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h new file mode 100644 index 000000000000..0d1342466db5 --- /dev/null +++ b/include/linux/mfd/viperboard.h @@ -0,0 +1,105 @@ +/* + * include/linux/mfd/viperboard.h + * + * Nano River Technologies viperboard definitions + * + * (C) 2012 by Lemonage GmbH + * Author: Lars Poeschel + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __MFD_VIPERBOARD_H__ +#define __MFD_VIPERBOARD_H__ + +#include +#include + +#define VPRBRD_EP_OUT 0x02 +#define VPRBRD_EP_IN 0x86 + +#define VPRBRD_I2C_MSG_LEN 512 /* max length of a msg on USB level */ + +#define VPRBRD_I2C_FREQ_6MHZ 1 /* 6 MBit/s */ +#define VPRBRD_I2C_FREQ_3MHZ 2 /* 3 MBit/s */ +#define VPRBRD_I2C_FREQ_1MHZ 3 /* 1 MBit/s */ +#define VPRBRD_I2C_FREQ_FAST 4 /* 400 kbit/s */ +#define VPRBRD_I2C_FREQ_400KHZ VPRBRD_I2C_FREQ_FAST +#define VPRBRD_I2C_FREQ_200KHZ 5 /* 200 kbit/s */ +#define VPRBRD_I2C_FREQ_STD 6 /* 100 kbit/s */ +#define VPRBRD_I2C_FREQ_100KHZ VPRBRD_I2C_FREQ_STD +#define VPRBRD_I2C_FREQ_10KHZ 7 /* 10 kbit/s */ + +#define VPRBRD_I2C_CMD_WRITE 0x00 +#define VPRBRD_I2C_CMD_READ 0x01 +#define VPRBRD_I2C_CMD_ADDR 0x02 + +#define VPRBRD_USB_TYPE_OUT 0x40 +#define VPRBRD_USB_TYPE_IN 0xc0 +#define VPRBRD_USB_TIMEOUT_MS 100 +#define VPRBRD_USB_REQUEST_MAJOR 0xea +#define VPRBRD_USB_REQUEST_MINOR 0xeb + +struct vprbrd_i2c_write_hdr { + u8 cmd; + u16 addr; + u8 len1; + u8 len2; + u8 last; + u8 chan; + u16 spi; +} __packed; + +struct vprbrd_i2c_read_hdr { + u8 cmd; + u16 addr; + u8 len0; + u8 len1; + u8 len2; + u8 len3; + u8 len4; + u8 len5; + u16 tf1; /* transfer 1 length */ + u16 tf2; /* transfer 2 length */ +} __packed; + +struct vprbrd_i2c_status { + u8 unknown[11]; + u8 status; +} __packed; + +struct vprbrd_i2c_write_msg { + struct vprbrd_i2c_write_hdr header; + u8 data[VPRBRD_I2C_MSG_LEN + - sizeof(struct vprbrd_i2c_write_hdr)]; +} __packed; + +struct vprbrd_i2c_read_msg { + struct vprbrd_i2c_read_hdr header; + u8 data[VPRBRD_I2C_MSG_LEN + - sizeof(struct vprbrd_i2c_read_hdr)]; +} __packed; + +struct vprbrd_i2c_addr_msg { + u8 cmd; + u8 addr; + u8 unknown1; + u16 len; + u8 unknown2; + u8 unknown3; +} __packed; + +/* Structure to hold all device specific stuff */ +struct vprbrd { + struct usb_device *usb_dev; /* the usb device for this device */ + struct mutex lock; + u8 buf[sizeof(struct vprbrd_i2c_write_msg)]; + struct platform_device pdev; +}; + +#endif /* __MFD_VIPERBOARD_H__ */ -- cgit v1.2.3 From 9d5b72de0d1627b130fa69c5edf58b5b2df4ca50 Mon Sep 17 00:00:00 2001 From: Lars Poeschel Date: Mon, 5 Nov 2012 15:48:24 +0100 Subject: gpio: Add viperboard gpio driver This adds the mfd cell to use the gpio a and gpio b part of the Nano River Technologies viperboard. Signed-off-by: Lars Poeschel Reviewed-by: Linus Walleij Signed-off-by: Samuel Ortiz --- drivers/gpio/Kconfig | 13 ++ drivers/gpio/Makefile | 1 + drivers/gpio/gpio-viperboard.c | 517 +++++++++++++++++++++++++++++++++++++++++ drivers/mfd/viperboard.c | 3 + include/linux/mfd/viperboard.h | 2 + 5 files changed, 536 insertions(+) create mode 100644 drivers/gpio/gpio-viperboard.c (limited to 'include/linux') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index d055cee36942..c3bbd08bcdc3 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -649,4 +649,17 @@ config GPIO_MSIC Enable support for GPIO on intel MSIC controllers found in intel MID devices +comment "USB GPIO expanders:" + +config GPIO_VIPERBOARD + tristate "Viperboard GPIO a & b support" + depends on MFD_VIPERBOARD && USB + help + Say yes here to access the GPIO signals of Nano River + Technologies Viperboard. There are two GPIO chips on the + board: gpioa and gpiob. + See viperboard API specification and Nano + River Tech's viperboard.h for detailed meaning + of the module parameters. + endif diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 9aeed6707326..16a1385226e9 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_GPIO_TPS65912) += gpio-tps65912.o obj-$(CONFIG_GPIO_TWL4030) += gpio-twl4030.o obj-$(CONFIG_GPIO_TWL6040) += gpio-twl6040.o obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o +obj-$(CONFIG_GPIO_VIPERBOARD) += gpio-viperboard.o obj-$(CONFIG_GPIO_VR41XX) += gpio-vr41xx.o obj-$(CONFIG_GPIO_VT8500) += gpio-vt8500.o obj-$(CONFIG_GPIO_VX855) += gpio-vx855.o diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c new file mode 100644 index 000000000000..13772996cf24 --- /dev/null +++ b/drivers/gpio/gpio-viperboard.c @@ -0,0 +1,517 @@ +/* + * Nano River Technologies viperboard GPIO lib driver + * + * (C) 2012 by Lemonage GmbH + * Author: Lars Poeschel + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#define VPRBRD_GPIOA_CLK_1MHZ 0 +#define VPRBRD_GPIOA_CLK_100KHZ 1 +#define VPRBRD_GPIOA_CLK_10KHZ 2 +#define VPRBRD_GPIOA_CLK_1KHZ 3 +#define VPRBRD_GPIOA_CLK_100HZ 4 +#define VPRBRD_GPIOA_CLK_10HZ 5 + +#define VPRBRD_GPIOA_FREQ_DEFAULT 1000 + +#define VPRBRD_GPIOA_CMD_CONT 0x00 +#define VPRBRD_GPIOA_CMD_PULSE 0x01 +#define VPRBRD_GPIOA_CMD_PWM 0x02 +#define VPRBRD_GPIOA_CMD_SETOUT 0x03 +#define VPRBRD_GPIOA_CMD_SETIN 0x04 +#define VPRBRD_GPIOA_CMD_SETINT 0x05 +#define VPRBRD_GPIOA_CMD_GETIN 0x06 + +#define VPRBRD_GPIOB_CMD_SETDIR 0x00 +#define VPRBRD_GPIOB_CMD_SETVAL 0x01 + +struct vprbrd_gpioa_msg { + u8 cmd; + u8 clk; + u8 offset; + u8 t1; + u8 t2; + u8 invert; + u8 pwmlevel; + u8 outval; + u8 risefall; + u8 answer; + u8 __fill; +} __packed; + +struct vprbrd_gpiob_msg { + u8 cmd; + u16 val; + u16 mask; +} __packed; + +struct vprbrd_gpio { + struct gpio_chip gpioa; /* gpio a related things */ + u32 gpioa_out; + u32 gpioa_val; + struct gpio_chip gpiob; /* gpio b related things */ + u32 gpiob_out; + u32 gpiob_val; + struct vprbrd *vb; +}; + +/* gpioa sampling clock module parameter */ +static unsigned char gpioa_clk; +static unsigned int gpioa_freq = VPRBRD_GPIOA_FREQ_DEFAULT; +module_param(gpioa_freq, uint, 0); +MODULE_PARM_DESC(gpioa_freq, + "gpio-a sampling freq in Hz (default is 1000Hz) valid values: 10, 100, 1000, 10000, 100000, 1000000"); + +/* ----- begin of gipo a chip -------------------------------------------- */ + +static int vprbrd_gpioa_get(struct gpio_chip *chip, + unsigned offset) +{ + int ret, answer, error = 0; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpioa); + struct vprbrd *vb = gpio->vb; + struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf; + + /* if io is set to output, just return the saved value */ + if (gpio->gpioa_out & (1 << offset)) + return gpio->gpioa_val & (1 << offset); + + mutex_lock(&vb->lock); + + gamsg->cmd = VPRBRD_GPIOA_CMD_GETIN; + gamsg->clk = 0x00; + gamsg->offset = offset; + gamsg->t1 = 0x00; + gamsg->t2 = 0x00; + gamsg->invert = 0x00; + gamsg->pwmlevel = 0x00; + gamsg->outval = 0x00; + gamsg->risefall = 0x00; + gamsg->answer = 0x00; + gamsg->__fill = 0x00; + + ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000, + 0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg), + VPRBRD_USB_TIMEOUT_MS); + if (ret != sizeof(struct vprbrd_gpioa_msg)) + error = -EREMOTEIO; + + ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_IN, 0x0000, + 0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg), + VPRBRD_USB_TIMEOUT_MS); + answer = gamsg->answer & 0x01; + + mutex_unlock(&vb->lock); + + if (ret != sizeof(struct vprbrd_gpioa_msg)) + error = -EREMOTEIO; + + if (error) + return error; + + return answer; +} + +static void vprbrd_gpioa_set(struct gpio_chip *chip, + unsigned offset, int value) +{ + int ret; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpioa); + struct vprbrd *vb = gpio->vb; + struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf; + + if (gpio->gpioa_out & (1 << offset)) { + if (value) + gpio->gpioa_val |= (1 << offset); + else + gpio->gpioa_val &= ~(1 << offset); + + mutex_lock(&vb->lock); + + gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT; + gamsg->clk = 0x00; + gamsg->offset = offset; + gamsg->t1 = 0x00; + gamsg->t2 = 0x00; + gamsg->invert = 0x00; + gamsg->pwmlevel = 0x00; + gamsg->outval = value; + gamsg->risefall = 0x00; + gamsg->answer = 0x00; + gamsg->__fill = 0x00; + + ret = usb_control_msg(vb->usb_dev, + usb_sndctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, + 0x0000, 0x0000, gamsg, + sizeof(struct vprbrd_gpioa_msg), VPRBRD_USB_TIMEOUT_MS); + + mutex_unlock(&vb->lock); + + if (ret != sizeof(struct vprbrd_gpioa_msg)) + dev_err(chip->dev, "usb error setting pin value\n"); + } +} + +static int vprbrd_gpioa_direction_input(struct gpio_chip *chip, + unsigned offset) +{ + int ret; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpioa); + struct vprbrd *vb = gpio->vb; + struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf; + + gpio->gpioa_out &= ~(1 << offset); + + mutex_lock(&vb->lock); + + gamsg->cmd = VPRBRD_GPIOA_CMD_SETIN; + gamsg->clk = gpioa_clk; + gamsg->offset = offset; + gamsg->t1 = 0x00; + gamsg->t2 = 0x00; + gamsg->invert = 0x00; + gamsg->pwmlevel = 0x00; + gamsg->outval = 0x00; + gamsg->risefall = 0x00; + gamsg->answer = 0x00; + gamsg->__fill = 0x00; + + ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000, + 0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg), + VPRBRD_USB_TIMEOUT_MS); + + mutex_unlock(&vb->lock); + + if (ret != sizeof(struct vprbrd_gpioa_msg)) + return -EREMOTEIO; + + return 0; +} + +static int vprbrd_gpioa_direction_output(struct gpio_chip *chip, + unsigned offset, int value) +{ + int ret; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpioa); + struct vprbrd *vb = gpio->vb; + struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf; + + gpio->gpioa_out |= (1 << offset); + if (value) + gpio->gpioa_val |= (1 << offset); + else + gpio->gpioa_val &= ~(1 << offset); + + mutex_lock(&vb->lock); + + gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT; + gamsg->clk = 0x00; + gamsg->offset = offset; + gamsg->t1 = 0x00; + gamsg->t2 = 0x00; + gamsg->invert = 0x00; + gamsg->pwmlevel = 0x00; + gamsg->outval = value; + gamsg->risefall = 0x00; + gamsg->answer = 0x00; + gamsg->__fill = 0x00; + + ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000, + 0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg), + VPRBRD_USB_TIMEOUT_MS); + + mutex_unlock(&vb->lock); + + if (ret != sizeof(struct vprbrd_gpioa_msg)) + return -EREMOTEIO; + + return 0; +} + +/* ----- end of gpio a chip ---------------------------------------------- */ + +/* ----- begin of gipo b chip -------------------------------------------- */ + +static int vprbrd_gpiob_setdir(struct vprbrd *vb, unsigned offset, + unsigned dir) +{ + struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf; + int ret; + + gbmsg->cmd = VPRBRD_GPIOB_CMD_SETDIR; + gbmsg->val = cpu_to_be16(dir << offset); + gbmsg->mask = cpu_to_be16(0x0001 << offset); + + ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT, 0x0000, + 0x0000, gbmsg, sizeof(struct vprbrd_gpiob_msg), + VPRBRD_USB_TIMEOUT_MS); + + if (ret != sizeof(struct vprbrd_gpiob_msg)) + return -EREMOTEIO; + + return 0; +} + +static int vprbrd_gpiob_get(struct gpio_chip *chip, + unsigned offset) +{ + int ret; + u16 val; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpiob); + struct vprbrd *vb = gpio->vb; + struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf; + + /* if io is set to output, just return the saved value */ + if (gpio->gpiob_out & (1 << offset)) + return gpio->gpiob_val & (1 << offset); + + mutex_lock(&vb->lock); + + ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_IN, 0x0000, + 0x0000, gbmsg, sizeof(struct vprbrd_gpiob_msg), + VPRBRD_USB_TIMEOUT_MS); + val = gbmsg->val; + + mutex_unlock(&vb->lock); + + if (ret != sizeof(struct vprbrd_gpiob_msg)) + return ret; + + /* cache the read values */ + gpio->gpiob_val = be16_to_cpu(val); + + return (gpio->gpiob_val >> offset) & 0x1; +} + +static void vprbrd_gpiob_set(struct gpio_chip *chip, + unsigned offset, int value) +{ + int ret; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpiob); + struct vprbrd *vb = gpio->vb; + struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf; + + if (gpio->gpiob_out & (1 << offset)) { + if (value) + gpio->gpiob_val |= (1 << offset); + else + gpio->gpiob_val &= ~(1 << offset); + + mutex_lock(&vb->lock); + + gbmsg->cmd = VPRBRD_GPIOB_CMD_SETVAL; + gbmsg->val = cpu_to_be16(value << offset); + gbmsg->mask = cpu_to_be16(0x0001 << offset); + + ret = usb_control_msg(vb->usb_dev, + usb_sndctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT, + 0x0000, 0x0000, gbmsg, + sizeof(struct vprbrd_gpiob_msg), VPRBRD_USB_TIMEOUT_MS); + + mutex_unlock(&vb->lock); + + if (ret != sizeof(struct vprbrd_gpiob_msg)) + dev_err(chip->dev, "usb error setting pin value\n"); + } +} + +static int vprbrd_gpiob_direction_input(struct gpio_chip *chip, + unsigned offset) +{ + int ret; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpiob); + struct vprbrd *vb = gpio->vb; + + gpio->gpiob_out &= ~(1 << offset); + + mutex_lock(&vb->lock); + + ret = vprbrd_gpiob_setdir(vb, offset, 0); + + mutex_unlock(&vb->lock); + + if (ret) + dev_err(chip->dev, "usb error setting pin to input\n"); + + return ret; +} + +static int vprbrd_gpiob_direction_output(struct gpio_chip *chip, + unsigned offset, int value) +{ + int ret; + struct vprbrd_gpio *gpio = + container_of(chip, struct vprbrd_gpio, gpiob); + struct vprbrd *vb = gpio->vb; + + gpio->gpiob_out |= (1 << offset); + if (value) + gpio->gpiob_val |= (1 << offset); + else + gpio->gpiob_val &= ~(1 << offset); + + mutex_lock(&vb->lock); + + ret = vprbrd_gpiob_setdir(vb, offset, 1); + if (ret) + dev_err(chip->dev, "usb error setting pin to output\n"); + + mutex_unlock(&vb->lock); + + vprbrd_gpiob_set(chip, offset, value); + + return ret; +} + +/* ----- end of gpio b chip ---------------------------------------------- */ + +static int __devinit vprbrd_gpio_probe(struct platform_device *pdev) +{ + struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent); + struct vprbrd_gpio *vb_gpio; + int ret; + + vb_gpio = devm_kzalloc(&pdev->dev, sizeof(*vb_gpio), GFP_KERNEL); + if (vb_gpio == NULL) + return -ENOMEM; + + vb_gpio->vb = vb; + /* registering gpio a */ + vb_gpio->gpioa.label = "viperboard gpio a"; + vb_gpio->gpioa.dev = &pdev->dev; + vb_gpio->gpioa.owner = THIS_MODULE; + vb_gpio->gpioa.base = -1; + vb_gpio->gpioa.ngpio = 16; + vb_gpio->gpioa.can_sleep = 1; + vb_gpio->gpioa.set = vprbrd_gpioa_set; + vb_gpio->gpioa.get = vprbrd_gpioa_get; + vb_gpio->gpioa.direction_input = vprbrd_gpioa_direction_input; + vb_gpio->gpioa.direction_output = vprbrd_gpioa_direction_output; + ret = gpiochip_add(&vb_gpio->gpioa); + if (ret < 0) { + dev_err(vb_gpio->gpioa.dev, "could not add gpio a"); + goto err_gpioa; + } + + /* registering gpio b */ + vb_gpio->gpiob.label = "viperboard gpio b"; + vb_gpio->gpiob.dev = &pdev->dev; + vb_gpio->gpiob.owner = THIS_MODULE; + vb_gpio->gpiob.base = -1; + vb_gpio->gpiob.ngpio = 16; + vb_gpio->gpiob.can_sleep = 1; + vb_gpio->gpiob.set = vprbrd_gpiob_set; + vb_gpio->gpiob.get = vprbrd_gpiob_get; + vb_gpio->gpiob.direction_input = vprbrd_gpiob_direction_input; + vb_gpio->gpiob.direction_output = vprbrd_gpiob_direction_output; + ret = gpiochip_add(&vb_gpio->gpiob); + if (ret < 0) { + dev_err(vb_gpio->gpiob.dev, "could not add gpio b"); + goto err_gpiob; + } + + platform_set_drvdata(pdev, vb_gpio); + + return ret; + +err_gpiob: + ret = gpiochip_remove(&vb_gpio->gpioa); + +err_gpioa: + return ret; +} + +static int __devexit vprbrd_gpio_remove(struct platform_device *pdev) +{ + struct vprbrd_gpio *vb_gpio = platform_get_drvdata(pdev); + int ret; + + ret = gpiochip_remove(&vb_gpio->gpiob); + if (ret == 0) + ret = gpiochip_remove(&vb_gpio->gpioa); + + return ret; +} + +static struct platform_driver vprbrd_gpio_driver = { + .driver.name = "viperboard-gpio", + .driver.owner = THIS_MODULE, + .probe = vprbrd_gpio_probe, + .remove = __devexit_p(vprbrd_gpio_remove), +}; + +static int __init vprbrd_gpio_init(void) +{ + switch (gpioa_freq) { + case 1000000: + gpioa_clk = VPRBRD_GPIOA_CLK_1MHZ; + break; + case 100000: + gpioa_clk = VPRBRD_GPIOA_CLK_100KHZ; + break; + case 10000: + gpioa_clk = VPRBRD_GPIOA_CLK_10KHZ; + break; + case 1000: + gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ; + break; + case 100: + gpioa_clk = VPRBRD_GPIOA_CLK_100HZ; + break; + case 10: + gpioa_clk = VPRBRD_GPIOA_CLK_10HZ; + break; + default: + pr_warn("invalid gpioa_freq (%d)\n", gpioa_freq); + gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ; + } + + return platform_driver_register(&vprbrd_gpio_driver); +} +subsys_initcall(vprbrd_gpio_init); + +static void __exit vprbrd_gpio_exit(void) +{ + platform_driver_unregister(&vprbrd_gpio_driver); +} +module_exit(vprbrd_gpio_exit); + +MODULE_AUTHOR("Lars Poeschel "); +MODULE_DESCRIPTION("GPIO driver for Nano River Techs Viperboard"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:viperboard-gpio"); diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c index cd1e2fe95647..c75ac08f639c 100644 --- a/drivers/mfd/viperboard.c +++ b/drivers/mfd/viperboard.c @@ -38,6 +38,9 @@ static const struct usb_device_id vprbrd_table[] = { MODULE_DEVICE_TABLE(usb, vprbrd_table); static struct mfd_cell vprbrd_devs[] = { + { + .name = "viperboard-gpio", + }, }; static int vprbrd_probe(struct usb_interface *interface, diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h index 0d1342466db5..42d339fac3e5 100644 --- a/include/linux/mfd/viperboard.h +++ b/include/linux/mfd/viperboard.h @@ -44,6 +44,8 @@ #define VPRBRD_USB_TIMEOUT_MS 100 #define VPRBRD_USB_REQUEST_MAJOR 0xea #define VPRBRD_USB_REQUEST_MINOR 0xeb +#define VPRBRD_USB_REQUEST_GPIOA 0xed +#define VPRBRD_USB_REQUEST_GPIOB 0xdd struct vprbrd_i2c_write_hdr { u8 cmd; -- cgit v1.2.3 From 174a13aa8669331605b138aaf61569dd7189e453 Mon Sep 17 00:00:00 2001 From: Lars Poeschel Date: Mon, 19 Nov 2012 16:36:04 +0100 Subject: i2c: Add viperboard i2c master driver This adds the mfd cell to use the i2c part of the Nano River Technologies viperboard as i2c master. Signed-off-by: Lars Poeschel Signed-off-by: Samuel Ortiz --- drivers/i2c/busses/Kconfig | 10 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-viperboard.c | 480 ++++++++++++++++++++++++++++++++++++ drivers/mfd/viperboard.c | 3 + include/linux/mfd/viperboard.h | 2 + 5 files changed, 496 insertions(+) create mode 100644 drivers/i2c/busses/i2c-viperboard.c (limited to 'include/linux') diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e9df4612b7eb..c7bff51fe524 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -818,6 +818,16 @@ config I2C_TINY_USB This driver can also be built as a module. If so, the module will be called i2c-tiny-usb. +config I2C_VIPERBOARD + tristate "Viperboard I2C master support" + depends on MFD_VIPERBOARD && USB + help + Say yes here to access the I2C part of the Nano River + Technologies Viperboard as I2C master. + See viperboard API specification and Nano + River Tech's viperboard.h for detailed meaning + of the module parameters. + comment "Other I2C/SMBus bus drivers" config I2C_ACORN diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 395b516ffa08..e5cb209d276c 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_I2C_PARPORT) += i2c-parport.o obj-$(CONFIG_I2C_PARPORT_LIGHT) += i2c-parport-light.o obj-$(CONFIG_I2C_TAOS_EVM) += i2c-taos-evm.o obj-$(CONFIG_I2C_TINY_USB) += i2c-tiny-usb.o +obj-$(CONFIG_I2C_VIPERBOARD) += i2c-viperboard.o # Other I2C/SMBus bus drivers obj-$(CONFIG_I2C_ACORN) += i2c-acorn.o diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c new file mode 100644 index 000000000000..f5fa20dea906 --- /dev/null +++ b/drivers/i2c/busses/i2c-viperboard.c @@ -0,0 +1,480 @@ +/* + * Nano River Technologies viperboard i2c master driver + * + * (C) 2012 by Lemonage GmbH + * Author: Lars Poeschel + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +struct vprbrd_i2c { + struct i2c_adapter i2c; + u8 bus_freq_param; +}; + +/* i2c bus frequency module parameter */ +static u8 i2c_bus_param; +static unsigned int i2c_bus_freq = 100; +module_param(i2c_bus_freq, int, 0); +MODULE_PARM_DESC(i2c_bus_freq, + "i2c bus frequency in khz (default is 100) valid values: 10, 100, 200, 400, 1000, 3000, 6000"); + +static int vprbrd_i2c_status(struct i2c_adapter *i2c, + struct vprbrd_i2c_status *status, bool prev_error) +{ + u16 bytes_xfer; + int ret; + struct vprbrd *vb = (struct vprbrd *)i2c->algo_data; + + /* check for protocol error */ + bytes_xfer = sizeof(struct vprbrd_i2c_status); + + ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0), + VPRBRD_USB_REQUEST_I2C, VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, + status, bytes_xfer, VPRBRD_USB_TIMEOUT_MS); + + if (ret != bytes_xfer) + prev_error = true; + + if (prev_error) { + dev_err(&i2c->dev, "failure in usb communication\n"); + return -EREMOTEIO; + } + + dev_dbg(&i2c->dev, " status = %d\n", status->status); + if (status->status != 0x00) { + dev_err(&i2c->dev, "failure: i2c protocol error\n"); + return -EPROTO; + } + return 0; +} + +static int vprbrd_i2c_receive(struct usb_device *usb_dev, + struct vprbrd_i2c_read_msg *rmsg, int bytes_xfer) +{ + int ret, bytes_actual; + int error = 0; + + /* send the read request */ + ret = usb_bulk_msg(usb_dev, + usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), rmsg, + sizeof(struct vprbrd_i2c_read_hdr), &bytes_actual, + VPRBRD_USB_TIMEOUT_MS); + + if ((ret < 0) + || (bytes_actual != sizeof(struct vprbrd_i2c_read_hdr))) { + dev_err(&usb_dev->dev, "failure transmitting usb\n"); + error = -EREMOTEIO; + } + + /* read the actual data */ + ret = usb_bulk_msg(usb_dev, + usb_rcvbulkpipe(usb_dev, VPRBRD_EP_IN), rmsg, + bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS); + + if ((ret < 0) || (bytes_xfer != bytes_actual)) { + dev_err(&usb_dev->dev, "failure receiving usb\n"); + error = -EREMOTEIO; + } + return error; +} + +static int vprbrd_i2c_addr(struct usb_device *usb_dev, + struct vprbrd_i2c_addr_msg *amsg) +{ + int ret, bytes_actual; + + ret = usb_bulk_msg(usb_dev, + usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), amsg, + sizeof(struct vprbrd_i2c_addr_msg), &bytes_actual, + VPRBRD_USB_TIMEOUT_MS); + + if ((ret < 0) || + (sizeof(struct vprbrd_i2c_addr_msg) != bytes_actual)) { + dev_err(&usb_dev->dev, "failure transmitting usb\n"); + return -EREMOTEIO; + } + return 0; +} + +static int vprbrd_i2c_read(struct vprbrd *vb, struct i2c_msg *msg) +{ + int ret; + u16 remain_len, bytes_xfer, len1, len2, + start = 0x0000; + struct vprbrd_i2c_read_msg *rmsg = + (struct vprbrd_i2c_read_msg *)vb->buf; + + remain_len = msg->len; + rmsg->header.cmd = VPRBRD_I2C_CMD_READ; + while (remain_len > 0) { + rmsg->header.addr = cpu_to_le16(start + 0x4000); + if (remain_len <= 255) { + len1 = remain_len; + len2 = 0x00; + rmsg->header.len0 = remain_len; + rmsg->header.len1 = 0x00; + rmsg->header.len2 = 0x00; + rmsg->header.len3 = 0x00; + rmsg->header.len4 = 0x00; + rmsg->header.len5 = 0x00; + remain_len = 0; + } else if (remain_len <= 510) { + len1 = remain_len; + len2 = 0x00; + rmsg->header.len0 = remain_len - 255; + rmsg->header.len1 = 0xff; + rmsg->header.len2 = 0x00; + rmsg->header.len3 = 0x00; + rmsg->header.len4 = 0x00; + rmsg->header.len5 = 0x00; + remain_len = 0; + } else if (remain_len <= 512) { + len1 = remain_len; + len2 = 0x00; + rmsg->header.len0 = remain_len - 510; + rmsg->header.len1 = 0xff; + rmsg->header.len2 = 0xff; + rmsg->header.len3 = 0x00; + rmsg->header.len4 = 0x00; + rmsg->header.len5 = 0x00; + remain_len = 0; + } else if (remain_len <= 767) { + len1 = 512; + len2 = remain_len - 512; + rmsg->header.len0 = 0x02; + rmsg->header.len1 = 0xff; + rmsg->header.len2 = 0xff; + rmsg->header.len3 = remain_len - 512; + rmsg->header.len4 = 0x00; + rmsg->header.len5 = 0x00; + bytes_xfer = remain_len; + remain_len = 0; + } else if (remain_len <= 1022) { + len1 = 512; + len2 = remain_len - 512; + rmsg->header.len0 = 0x02; + rmsg->header.len1 = 0xff; + rmsg->header.len2 = 0xff; + rmsg->header.len3 = remain_len - 767; + rmsg->header.len4 = 0xff; + rmsg->header.len5 = 0x00; + remain_len = 0; + } else if (remain_len <= 1024) { + len1 = 512; + len2 = remain_len - 512; + rmsg->header.len0 = 0x02; + rmsg->header.len1 = 0xff; + rmsg->header.len2 = 0xff; + rmsg->header.len3 = remain_len - 1022; + rmsg->header.len4 = 0xff; + rmsg->header.len5 = 0xff; + remain_len = 0; + } else { + len1 = 512; + len2 = 512; + rmsg->header.len0 = 0x02; + rmsg->header.len1 = 0xff; + rmsg->header.len2 = 0xff; + rmsg->header.len3 = 0x02; + rmsg->header.len4 = 0xff; + rmsg->header.len5 = 0xff; + remain_len -= 1024; + start += 1024; + } + rmsg->header.tf1 = cpu_to_le16(len1); + rmsg->header.tf2 = cpu_to_le16(len2); + + /* first read transfer */ + ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len1); + if (ret < 0) + return ret; + /* copy the received data */ + memcpy(msg->buf + start, rmsg, len1); + + /* second read transfer if neccessary */ + if (len2 > 0) { + ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len2); + if (ret < 0) + return ret; + /* copy the received data */ + memcpy(msg->buf + start + 512, rmsg, len2); + } + } + return 0; +} + +static int vprbrd_i2c_write(struct vprbrd *vb, struct i2c_msg *msg) +{ + int ret, bytes_actual; + u16 remain_len, bytes_xfer, + start = 0x0000; + struct vprbrd_i2c_write_msg *wmsg = + (struct vprbrd_i2c_write_msg *)vb->buf; + + remain_len = msg->len; + wmsg->header.cmd = VPRBRD_I2C_CMD_WRITE; + wmsg->header.last = 0x00; + wmsg->header.chan = 0x00; + wmsg->header.spi = 0x0000; + while (remain_len > 0) { + wmsg->header.addr = cpu_to_le16(start + 0x4000); + if (remain_len > 503) { + wmsg->header.len1 = 0xff; + wmsg->header.len2 = 0xf8; + remain_len -= 503; + bytes_xfer = 503 + sizeof(struct vprbrd_i2c_write_hdr); + start += 503; + } else if (remain_len > 255) { + wmsg->header.len1 = 0xff; + wmsg->header.len2 = (remain_len - 255); + bytes_xfer = remain_len + + sizeof(struct vprbrd_i2c_write_hdr); + remain_len = 0; + } else { + wmsg->header.len1 = remain_len; + wmsg->header.len2 = 0x00; + bytes_xfer = remain_len + + sizeof(struct vprbrd_i2c_write_hdr); + remain_len = 0; + } + memcpy(wmsg->data, msg->buf + start, + bytes_xfer - sizeof(struct vprbrd_i2c_write_hdr)); + + ret = usb_bulk_msg(vb->usb_dev, + usb_sndbulkpipe(vb->usb_dev, + VPRBRD_EP_OUT), wmsg, + bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS); + if ((ret < 0) || (bytes_xfer != bytes_actual)) + return -EREMOTEIO; + } + return 0; +} + +static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs, + int num) +{ + struct i2c_msg *pmsg; + int i, ret, + error = 0; + struct vprbrd *vb = (struct vprbrd *)i2c->algo_data; + struct vprbrd_i2c_addr_msg *amsg = + (struct vprbrd_i2c_addr_msg *)vb->buf; + struct vprbrd_i2c_status *smsg = (struct vprbrd_i2c_status *)vb->buf; + + dev_dbg(&i2c->dev, "master xfer %d messages:\n", num); + + for (i = 0 ; i < num ; i++) { + pmsg = &msgs[i]; + + dev_dbg(&i2c->dev, + " %d: %s (flags %d) %d bytes to 0x%02x\n", + i, pmsg->flags & I2C_M_RD ? "read" : "write", + pmsg->flags, pmsg->len, pmsg->addr); + + /* msgs longer than 2048 bytes are not supported by adapter */ + if (pmsg->len > 2048) + return -EINVAL; + + mutex_lock(&vb->lock); + /* directly send the message */ + if (pmsg->flags & I2C_M_RD) { + /* read data */ + amsg->cmd = VPRBRD_I2C_CMD_ADDR; + amsg->unknown2 = 0x00; + amsg->unknown3 = 0x00; + amsg->addr = pmsg->addr; + amsg->unknown1 = 0x01; + amsg->len = cpu_to_le16(pmsg->len); + /* send the addr and len, we're interested to board */ + ret = vprbrd_i2c_addr(vb->usb_dev, amsg); + if (ret < 0) + error = ret; + + ret = vprbrd_i2c_read(vb, pmsg); + if (ret < 0) + error = ret; + + ret = vprbrd_i2c_status(i2c, smsg, error); + if (ret < 0) + error = ret; + /* in case of protocol error, return the error */ + if (error < 0) + goto error; + } else { + /* write data */ + ret = vprbrd_i2c_write(vb, pmsg); + + amsg->cmd = VPRBRD_I2C_CMD_ADDR; + amsg->unknown2 = 0x00; + amsg->unknown3 = 0x00; + amsg->addr = pmsg->addr; + amsg->unknown1 = 0x00; + amsg->len = cpu_to_le16(pmsg->len); + /* send the addr, the data goes to to board */ + ret = vprbrd_i2c_addr(vb->usb_dev, amsg); + if (ret < 0) + error = ret; + + ret = vprbrd_i2c_status(i2c, smsg, error); + if (ret < 0) + error = ret; + + if (error < 0) + goto error; + } + mutex_unlock(&vb->lock); + } + return 0; +error: + mutex_unlock(&vb->lock); + return error; +} + +static u32 vprbrd_i2c_func(struct i2c_adapter *i2c) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +/* This is the actual algorithm we define */ +static const struct i2c_algorithm vprbrd_algorithm = { + .master_xfer = vprbrd_i2c_xfer, + .functionality = vprbrd_i2c_func, +}; + +static int __devinit vprbrd_i2c_probe(struct platform_device *pdev) +{ + struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent); + struct vprbrd_i2c *vb_i2c; + int ret; + int pipe; + + vb_i2c = kzalloc(sizeof(*vb_i2c), GFP_KERNEL); + if (vb_i2c == NULL) + return -ENOMEM; + + /* setup i2c adapter description */ + vb_i2c->i2c.owner = THIS_MODULE; + vb_i2c->i2c.class = I2C_CLASS_HWMON; + vb_i2c->i2c.algo = &vprbrd_algorithm; + vb_i2c->i2c.algo_data = vb; + /* save the param in usb capabable memory */ + vb_i2c->bus_freq_param = i2c_bus_param; + + snprintf(vb_i2c->i2c.name, sizeof(vb_i2c->i2c.name), + "viperboard at bus %03d device %03d", + vb->usb_dev->bus->busnum, vb->usb_dev->devnum); + + /* setting the bus frequency */ + if ((i2c_bus_param <= VPRBRD_I2C_FREQ_10KHZ) + && (i2c_bus_param >= VPRBRD_I2C_FREQ_6MHZ)) { + pipe = usb_sndctrlpipe(vb->usb_dev, 0); + ret = usb_control_msg(vb->usb_dev, pipe, + VPRBRD_USB_REQUEST_I2C_FREQ, VPRBRD_USB_TYPE_OUT, + 0x0000, 0x0000, &vb_i2c->bus_freq_param, 1, + VPRBRD_USB_TIMEOUT_MS); + if (ret != 1) { + dev_err(&pdev->dev, + "failure setting i2c_bus_freq to %d\n", i2c_bus_freq); + ret = -EIO; + goto error; + } + } else { + dev_err(&pdev->dev, + "invalid i2c_bus_freq setting:%d\n", i2c_bus_freq); + ret = -EIO; + goto error; + } + + vb_i2c->i2c.dev.parent = &pdev->dev; + + /* attach to i2c layer */ + i2c_add_adapter(&vb_i2c->i2c); + + platform_set_drvdata(pdev, vb_i2c); + + return 0; + +error: + kfree(vb_i2c); + return ret; +} + +static int __devexit vprbrd_i2c_remove(struct platform_device *pdev) +{ + struct vprbrd_i2c *vb_i2c = platform_get_drvdata(pdev); + int ret; + + ret = i2c_del_adapter(&vb_i2c->i2c); + + return ret; +} + +static struct platform_driver vprbrd_i2c_driver = { + .driver.name = "viperboard-i2c", + .driver.owner = THIS_MODULE, + .probe = vprbrd_i2c_probe, + .remove = __devexit_p(vprbrd_i2c_remove), +}; + +static int __init vprbrd_i2c_init(void) +{ + switch (i2c_bus_freq) { + case 6000: + i2c_bus_param = VPRBRD_I2C_FREQ_6MHZ; + break; + case 3000: + i2c_bus_param = VPRBRD_I2C_FREQ_3MHZ; + break; + case 1000: + i2c_bus_param = VPRBRD_I2C_FREQ_1MHZ; + break; + case 400: + i2c_bus_param = VPRBRD_I2C_FREQ_400KHZ; + break; + case 200: + i2c_bus_param = VPRBRD_I2C_FREQ_200KHZ; + break; + case 100: + i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ; + break; + case 10: + i2c_bus_param = VPRBRD_I2C_FREQ_10KHZ; + break; + default: + pr_warn("invalid i2c_bus_freq (%d)\n", i2c_bus_freq); + i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ; + } + + return platform_driver_register(&vprbrd_i2c_driver); +} +subsys_initcall(vprbrd_i2c_init); + +static void __exit vprbrd_i2c_exit(void) +{ + platform_driver_unregister(&vprbrd_i2c_driver); +} +module_exit(vprbrd_i2c_exit); + +MODULE_AUTHOR("Lars Poeschel "); +MODULE_DESCRIPTION("I2C master driver for Nano River Techs Viperboard"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:viperboard-i2c"); diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c index c75ac08f639c..4f74ec868a52 100644 --- a/drivers/mfd/viperboard.c +++ b/drivers/mfd/viperboard.c @@ -41,6 +41,9 @@ static struct mfd_cell vprbrd_devs[] = { { .name = "viperboard-gpio", }, + { + .name = "viperboard-i2c", + }, }; static int vprbrd_probe(struct usb_interface *interface, diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h index 42d339fac3e5..ef7851430d86 100644 --- a/include/linux/mfd/viperboard.h +++ b/include/linux/mfd/viperboard.h @@ -42,6 +42,8 @@ #define VPRBRD_USB_TYPE_OUT 0x40 #define VPRBRD_USB_TYPE_IN 0xc0 #define VPRBRD_USB_TIMEOUT_MS 100 +#define VPRBRD_USB_REQUEST_I2C_FREQ 0xe6 +#define VPRBRD_USB_REQUEST_I2C 0xe9 #define VPRBRD_USB_REQUEST_MAJOR 0xea #define VPRBRD_USB_REQUEST_MINOR 0xeb #define VPRBRD_USB_REQUEST_GPIOA 0xed -- cgit v1.2.3 From ffd8a6e7a77802dd09559be78dea63956aa8f1d5 Mon Sep 17 00:00:00 2001 From: Lars Poeschel Date: Mon, 5 Nov 2012 15:48:26 +0100 Subject: iio: adc: Add viperboard adc driver This adds the mfd cell to use the adc part of the Nano River Technologies viperboard. Signed-off-by: Lars Poeschel Reviewed-by: Lars-Peter Clausen Acked-by: Jonathan Cameron Signed-off-by: Samuel Ortiz --- drivers/iio/adc/Kconfig | 7 ++ drivers/iio/adc/Makefile | 1 + drivers/iio/adc/viperboard_adc.c | 181 +++++++++++++++++++++++++++++++++++++++ drivers/mfd/viperboard.c | 3 + include/linux/mfd/viperboard.h | 1 + 5 files changed, 193 insertions(+) create mode 100644 drivers/iio/adc/viperboard_adc.c (limited to 'include/linux') diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 1401ed1af39f..bb3053739c80 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -67,4 +67,11 @@ config TI_AM335X_ADC Say yes here to build support for Texas Instruments ADC driver which is also a MFD client. +config VIPERBOARD_ADC + tristate "Viperboard ADC support" + depends on MFD_VIPERBOARD && USB + help + Say yes here to access the ADC part of the Nano River + Technologies Viperboard. + endmenu diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 4410a90fc84b..4268fa987fe6 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_AD7791) += ad7791.o obj-$(CONFIG_AT91_ADC) += at91_adc.o obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o +obj-$(CONFIG_VIPERBOARD_ADC) += viperboard_adc.o \ No newline at end of file diff --git a/drivers/iio/adc/viperboard_adc.c b/drivers/iio/adc/viperboard_adc.c new file mode 100644 index 000000000000..10136a8b20d4 --- /dev/null +++ b/drivers/iio/adc/viperboard_adc.c @@ -0,0 +1,181 @@ +/* + * Nano River Technologies viperboard IIO ADC driver + * + * (C) 2012 by Lemonage GmbH + * Author: Lars Poeschel + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#define VPRBRD_ADC_CMD_GET 0x00 + +struct vprbrd_adc_msg { + u8 cmd; + u8 chan; + u8 val; +} __packed; + +struct vprbrd_adc { + struct vprbrd *vb; +}; + +#define VPRBRD_ADC_CHANNEL(_index) { \ + .type = IIO_VOLTAGE, \ + .indexed = 1, \ + .channel = _index, \ + .info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT, \ + .scan_index = _index, \ + .scan_type = { \ + .sign = 'u', \ + .realbits = 8, \ + .storagebits = 8, \ + }, \ +} + +static struct iio_chan_spec const vprbrd_adc_iio_channels[] = { + VPRBRD_ADC_CHANNEL(0), + VPRBRD_ADC_CHANNEL(1), + VPRBRD_ADC_CHANNEL(2), + VPRBRD_ADC_CHANNEL(3), +}; + +static int vprbrd_iio_read_raw(struct iio_dev *iio_dev, + struct iio_chan_spec const *chan, + int *val, + int *val2, + long info) +{ + int ret, error = 0; + struct vprbrd_adc *adc = iio_priv(iio_dev); + struct vprbrd *vb = adc->vb; + struct vprbrd_adc_msg *admsg = (struct vprbrd_adc_msg *)vb->buf; + + switch (info) { + case IIO_CHAN_INFO_RAW: + mutex_lock(&vb->lock); + + admsg->cmd = VPRBRD_ADC_CMD_GET; + admsg->chan = chan->scan_index; + admsg->val = 0x00; + + ret = usb_control_msg(vb->usb_dev, + usb_sndctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC, + VPRBRD_USB_TYPE_OUT, 0x0000, 0x0000, admsg, + sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS); + if (ret != sizeof(struct vprbrd_adc_msg)) { + dev_err(&iio_dev->dev, "usb send error on adc read\n"); + error = -EREMOTEIO; + } + + ret = usb_control_msg(vb->usb_dev, + usb_rcvctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC, + VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, admsg, + sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS); + + *val = admsg->val; + + mutex_unlock(&vb->lock); + + if (ret != sizeof(struct vprbrd_adc_msg)) { + dev_err(&iio_dev->dev, "usb recv error on adc read\n"); + error = -EREMOTEIO; + } + + if (error) + goto error; + + return IIO_VAL_INT; + default: + error = -EINVAL; + break; + } +error: + return error; +} + +static const struct iio_info vprbrd_adc_iio_info = { + .read_raw = &vprbrd_iio_read_raw, + .driver_module = THIS_MODULE, +}; + +static int __devinit vprbrd_adc_probe(struct platform_device *pdev) +{ + struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent); + struct vprbrd_adc *adc; + struct iio_dev *indio_dev; + int ret; + + /* registering iio */ + indio_dev = iio_device_alloc(sizeof(*adc)); + if (!indio_dev) { + dev_err(&pdev->dev, "failed allocating iio device\n"); + return -ENOMEM; + } + + adc = iio_priv(indio_dev); + adc->vb = vb; + indio_dev->name = "viperboard adc"; + indio_dev->dev.parent = &pdev->dev; + indio_dev->info = &vprbrd_adc_iio_info; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->channels = vprbrd_adc_iio_channels; + indio_dev->num_channels = ARRAY_SIZE(vprbrd_adc_iio_channels); + + ret = iio_device_register(indio_dev); + if (ret) { + dev_err(&pdev->dev, "could not register iio (adc)"); + goto error; + } + + platform_set_drvdata(pdev, indio_dev); + + return 0; + +error: + iio_device_free(indio_dev); + return ret; +} + +static int __devexit vprbrd_adc_remove(struct platform_device *pdev) +{ + struct iio_dev *indio_dev = platform_get_drvdata(pdev); + + iio_device_unregister(indio_dev); + iio_device_free(indio_dev); + + return 0; +} + +static struct platform_driver vprbrd_adc_driver = { + .driver = { + .name = "viperboard-adc", + .owner = THIS_MODULE, + }, + .probe = vprbrd_adc_probe, + .remove = __devexit_p(vprbrd_adc_remove), +}; + +module_platform_driver(vprbrd_adc_driver); + +MODULE_AUTHOR("Lars Poeschel "); +MODULE_DESCRIPTION("IIO ADC driver for Nano River Techs Viperboard"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:viperboard-adc"); diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c index 4f74ec868a52..276122b37199 100644 --- a/drivers/mfd/viperboard.c +++ b/drivers/mfd/viperboard.c @@ -44,6 +44,9 @@ static struct mfd_cell vprbrd_devs[] = { { .name = "viperboard-i2c", }, + { + .name = "viperboard-adc", + }, }; static int vprbrd_probe(struct usb_interface *interface, diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h index ef7851430d86..193452848c04 100644 --- a/include/linux/mfd/viperboard.h +++ b/include/linux/mfd/viperboard.h @@ -46,6 +46,7 @@ #define VPRBRD_USB_REQUEST_I2C 0xe9 #define VPRBRD_USB_REQUEST_MAJOR 0xea #define VPRBRD_USB_REQUEST_MINOR 0xeb +#define VPRBRD_USB_REQUEST_ADC 0xec #define VPRBRD_USB_REQUEST_GPIOA 0xed #define VPRBRD_USB_REQUEST_GPIOB 0xdd -- cgit v1.2.3 From 1950c7164646bfeeb82c34bc299d82119706afb5 Mon Sep 17 00:00:00 2001 From: Davide Ciminaghi Date: Fri, 9 Nov 2012 15:19:52 +0100 Subject: mfd: sta2x11-mfd: Add apb-soc regs driver and factor out common code A driver for the apb-soc registers is needed by the clock infrastructure code to configure and control clocks on the sta2x11 chip. Since some of the functions in sta2x11-mfd.c were almost identical for the two existing platform devices, the following changes have been performed to avoid further code duplication while adding the apb-soc-regs driver: * The sctl_regs and apbreg_regs fields in struct sta2x11_mfd have been turned into just one array of pointers accessed by device index. * Platform probe methods have become one-liners invoking a common probe with the device's index as second parameter. * For loops have been inserted where the same operations were performed for each of the two bars of a pci device. * The apbreg_mask and sctl_mask functions were almost identical, so they were turned into inline functions invoking a common __sta2x11_mfd_mask() with the platform device's index as last parameter. To do this, enum sta2x11_mfd_plat_dev has been declared in sta2x11-mfd.h and more device types have been added to it. Reviewed-by: Mark Brown Signed-off-by: Davide Ciminaghi Acked-by: Alessandro Rubini Signed-off-by: Samuel Ortiz --- drivers/mfd/sta2x11-mfd.c | 350 +++++++++++++++++++++++++++------------- include/linux/mfd/sta2x11-mfd.h | 156 +++++++++++++++++- 2 files changed, 395 insertions(+), 111 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c index d35da6820bea..9e01b84aa8bc 100644 --- a/drivers/mfd/sta2x11-mfd.c +++ b/drivers/mfd/sta2x11-mfd.c @@ -40,8 +40,7 @@ struct sta2x11_mfd { struct sta2x11_instance *instance; spinlock_t lock; struct list_head list; - void __iomem *sctl_regs; - void __iomem *apbreg_regs; + void __iomem *regs[sta2x11_n_mfd_plat_devs]; }; static LIST_HEAD(sta2x11_mfd_list); @@ -100,56 +99,33 @@ static int __devexit mfd_remove(struct pci_dev *pdev) return 0; } -/* These two functions are exported and are not expected to fail */ -u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) +/* This function is exported and is not expected to fail */ +u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val, + enum sta2x11_mfd_plat_dev index) { struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev); u32 r; unsigned long flags; + void __iomem *regs = mfd->regs[index]; if (!mfd) { dev_warn(&pdev->dev, ": can't access sctl regs\n"); return 0; } - if (!mfd->sctl_regs) { + if (!regs) { dev_warn(&pdev->dev, ": system ctl not initialized\n"); return 0; } spin_lock_irqsave(&mfd->lock, flags); - r = readl(mfd->sctl_regs + reg); + r = readl(regs + reg); r &= ~mask; r |= val; if (mask) - writel(r, mfd->sctl_regs + reg); + writel(r, regs + reg); spin_unlock_irqrestore(&mfd->lock, flags); return r; } -EXPORT_SYMBOL(sta2x11_sctl_mask); - -u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) -{ - struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev); - u32 r; - unsigned long flags; - - if (!mfd) { - dev_warn(&pdev->dev, ": can't access apb regs\n"); - return 0; - } - if (!mfd->apbreg_regs) { - dev_warn(&pdev->dev, ": apb bridge not initialized\n"); - return 0; - } - spin_lock_irqsave(&mfd->lock, flags); - r = readl(mfd->apbreg_regs + reg); - r &= ~mask; - r |= val; - if (mask) - writel(r, mfd->apbreg_regs + reg); - spin_unlock_irqrestore(&mfd->lock, flags); - return r; -} -EXPORT_SYMBOL(sta2x11_apbreg_mask); +EXPORT_SYMBOL(__sta2x11_mfd_mask); /* Two debugfs files, for our registers (FIXME: one instance only) */ #define REG(regname) {.name = #regname, .offset = SCTL_ ## regname} @@ -180,51 +156,103 @@ static struct debugfs_regset32 apbreg_regset = { .nregs = ARRAY_SIZE(sta2x11_apbreg_regs), }; -static struct dentry *sta2x11_sctl_debugfs; -static struct dentry *sta2x11_apbreg_debugfs; +#define REG(regname) {.name = #regname, .offset = regname} +static struct debugfs_reg32 sta2x11_apb_soc_regs_regs[] = { + REG(PCIE_EP1_FUNC3_0_INTR_REG), REG(PCIE_EP1_FUNC7_4_INTR_REG), + REG(PCIE_EP2_FUNC3_0_INTR_REG), REG(PCIE_EP2_FUNC7_4_INTR_REG), + REG(PCIE_EP3_FUNC3_0_INTR_REG), REG(PCIE_EP3_FUNC7_4_INTR_REG), + REG(PCIE_EP4_FUNC3_0_INTR_REG), REG(PCIE_EP4_FUNC7_4_INTR_REG), + REG(PCIE_INTR_ENABLE0_REG), REG(PCIE_INTR_ENABLE1_REG), + REG(PCIE_EP1_FUNC_TC_REG), REG(PCIE_EP2_FUNC_TC_REG), + REG(PCIE_EP3_FUNC_TC_REG), REG(PCIE_EP4_FUNC_TC_REG), + REG(PCIE_EP1_FUNC_F_REG), REG(PCIE_EP2_FUNC_F_REG), + REG(PCIE_EP3_FUNC_F_REG), REG(PCIE_EP4_FUNC_F_REG), + REG(PCIE_PAB_AMBA_SW_RST_REG), REG(PCIE_PM_STATUS_0_PORT_0_4), + REG(PCIE_PM_STATUS_7_0_EP1), REG(PCIE_PM_STATUS_7_0_EP2), + REG(PCIE_PM_STATUS_7_0_EP3), REG(PCIE_PM_STATUS_7_0_EP4), + REG(PCIE_DEV_ID_0_EP1_REG), REG(PCIE_CC_REV_ID_0_EP1_REG), + REG(PCIE_DEV_ID_1_EP1_REG), REG(PCIE_CC_REV_ID_1_EP1_REG), + REG(PCIE_DEV_ID_2_EP1_REG), REG(PCIE_CC_REV_ID_2_EP1_REG), + REG(PCIE_DEV_ID_3_EP1_REG), REG(PCIE_CC_REV_ID_3_EP1_REG), + REG(PCIE_DEV_ID_4_EP1_REG), REG(PCIE_CC_REV_ID_4_EP1_REG), + REG(PCIE_DEV_ID_5_EP1_REG), REG(PCIE_CC_REV_ID_5_EP1_REG), + REG(PCIE_DEV_ID_6_EP1_REG), REG(PCIE_CC_REV_ID_6_EP1_REG), + REG(PCIE_DEV_ID_7_EP1_REG), REG(PCIE_CC_REV_ID_7_EP1_REG), + REG(PCIE_DEV_ID_0_EP2_REG), REG(PCIE_CC_REV_ID_0_EP2_REG), + REG(PCIE_DEV_ID_1_EP2_REG), REG(PCIE_CC_REV_ID_1_EP2_REG), + REG(PCIE_DEV_ID_2_EP2_REG), REG(PCIE_CC_REV_ID_2_EP2_REG), + REG(PCIE_DEV_ID_3_EP2_REG), REG(PCIE_CC_REV_ID_3_EP2_REG), + REG(PCIE_DEV_ID_4_EP2_REG), REG(PCIE_CC_REV_ID_4_EP2_REG), + REG(PCIE_DEV_ID_5_EP2_REG), REG(PCIE_CC_REV_ID_5_EP2_REG), + REG(PCIE_DEV_ID_6_EP2_REG), REG(PCIE_CC_REV_ID_6_EP2_REG), + REG(PCIE_DEV_ID_7_EP2_REG), REG(PCIE_CC_REV_ID_7_EP2_REG), + REG(PCIE_DEV_ID_0_EP3_REG), REG(PCIE_CC_REV_ID_0_EP3_REG), + REG(PCIE_DEV_ID_1_EP3_REG), REG(PCIE_CC_REV_ID_1_EP3_REG), + REG(PCIE_DEV_ID_2_EP3_REG), REG(PCIE_CC_REV_ID_2_EP3_REG), + REG(PCIE_DEV_ID_3_EP3_REG), REG(PCIE_CC_REV_ID_3_EP3_REG), + REG(PCIE_DEV_ID_4_EP3_REG), REG(PCIE_CC_REV_ID_4_EP3_REG), + REG(PCIE_DEV_ID_5_EP3_REG), REG(PCIE_CC_REV_ID_5_EP3_REG), + REG(PCIE_DEV_ID_6_EP3_REG), REG(PCIE_CC_REV_ID_6_EP3_REG), + REG(PCIE_DEV_ID_7_EP3_REG), REG(PCIE_CC_REV_ID_7_EP3_REG), + REG(PCIE_DEV_ID_0_EP4_REG), REG(PCIE_CC_REV_ID_0_EP4_REG), + REG(PCIE_DEV_ID_1_EP4_REG), REG(PCIE_CC_REV_ID_1_EP4_REG), + REG(PCIE_DEV_ID_2_EP4_REG), REG(PCIE_CC_REV_ID_2_EP4_REG), + REG(PCIE_DEV_ID_3_EP4_REG), REG(PCIE_CC_REV_ID_3_EP4_REG), + REG(PCIE_DEV_ID_4_EP4_REG), REG(PCIE_CC_REV_ID_4_EP4_REG), + REG(PCIE_DEV_ID_5_EP4_REG), REG(PCIE_CC_REV_ID_5_EP4_REG), + REG(PCIE_DEV_ID_6_EP4_REG), REG(PCIE_CC_REV_ID_6_EP4_REG), + REG(PCIE_DEV_ID_7_EP4_REG), REG(PCIE_CC_REV_ID_7_EP4_REG), + REG(PCIE_SUBSYS_VEN_ID_REG), REG(PCIE_COMMON_CLOCK_CONFIG_0_4_0), + REG(PCIE_MIPHYP_SSC_EN_REG), REG(PCIE_MIPHYP_ADDR_REG), + REG(PCIE_L1_ASPM_READY_REG), REG(PCIE_EXT_CFG_RDY_REG), + REG(PCIE_SoC_INT_ROUTER_STATUS0_REG), + REG(PCIE_SoC_INT_ROUTER_STATUS1_REG), + REG(PCIE_SoC_INT_ROUTER_STATUS2_REG), + REG(PCIE_SoC_INT_ROUTER_STATUS3_REG), + REG(DMA_IP_CTRL_REG), REG(DISP_BRIDGE_PU_PD_CTRL_REG), + REG(VIP_PU_PD_CTRL_REG), REG(USB_MLB_PU_PD_CTRL_REG), + REG(SDIO_PU_PD_MISCFUNC_CTRL_REG1), REG(SDIO_PU_PD_MISCFUNC_CTRL_REG2), + REG(UART_PU_PD_CTRL_REG), REG(ARM_Lock), REG(SYS_IO_CHAR_REG1), + REG(SYS_IO_CHAR_REG2), REG(SATA_CORE_ID_REG), REG(SATA_CTRL_REG), + REG(I2C_HSFIX_MISC_REG), REG(SPARE2_RESERVED), REG(SPARE3_RESERVED), + REG(MASTER_LOCK_REG), REG(SYSTEM_CONFIG_STATUS_REG), + REG(MSP_CLK_CTRL_REG), REG(COMPENSATION_REG1), REG(COMPENSATION_REG2), + REG(COMPENSATION_REG3), REG(TEST_CTL_REG), +}; +#undef REG -/* Probe for the two platform devices */ -static int sta2x11_sctl_probe(struct platform_device *dev) -{ - struct pci_dev **pdev; - struct sta2x11_mfd *mfd; - struct resource *res; +static struct debugfs_regset32 apb_soc_regs_regset = { + .regs = sta2x11_apb_soc_regs_regs, + .nregs = ARRAY_SIZE(sta2x11_apb_soc_regs_regs), +}; - pdev = dev->dev.platform_data; - mfd = sta2x11_mfd_find(*pdev); - if (!mfd) - return -ENODEV; - res = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (!res) - return -ENOMEM; +static struct dentry *sta2x11_mfd_debugfs[sta2x11_n_mfd_plat_devs]; - if (!request_mem_region(res->start, resource_size(res), - "sta2x11-sctl")) - return -EBUSY; +static struct debugfs_regset32 *sta2x11_mfd_regset[sta2x11_n_mfd_plat_devs] = { + [sta2x11_sctl] = &sctl_regset, + [sta2x11_apbreg] = &apbreg_regset, + [sta2x11_apb_soc_regs] = &apb_soc_regs_regset, +}; - mfd->sctl_regs = ioremap(res->start, resource_size(res)); - if (!mfd->sctl_regs) { - release_mem_region(res->start, resource_size(res)); - return -ENOMEM; - } - sctl_regset.base = mfd->sctl_regs; - sta2x11_sctl_debugfs = debugfs_create_regset32("sta2x11-sctl", - S_IFREG | S_IRUGO, - NULL, &sctl_regset); - return 0; -} +static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = { + [sta2x11_sctl] = "sta2x11-sctl", + [sta2x11_apbreg] = "sta2x11-apbreg", + [sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs", +}; -static int sta2x11_apbreg_probe(struct platform_device *dev) +/* Probe for the three platform devices */ + +static int sta2x11_mfd_platform_probe(struct platform_device *dev, + enum sta2x11_mfd_plat_dev index) { struct pci_dev **pdev; struct sta2x11_mfd *mfd; struct resource *res; + const char *name = sta2x11_mfd_names[index]; + struct debugfs_regset32 *regset = sta2x11_mfd_regset[index]; pdev = dev->dev.platform_data; - dev_dbg(&dev->dev, "%s: pdata is %p\n", __func__, pdev); - dev_dbg(&dev->dev, "%s: *pdata is %p\n", __func__, *pdev); - mfd = sta2x11_mfd_find(*pdev); if (!mfd) return -ENODEV; @@ -233,25 +261,37 @@ static int sta2x11_apbreg_probe(struct platform_device *dev) if (!res) return -ENOMEM; - if (!request_mem_region(res->start, resource_size(res), - "sta2x11-apbreg")) + if (!request_mem_region(res->start, resource_size(res), name)) return -EBUSY; - mfd->apbreg_regs = ioremap(res->start, resource_size(res)); - if (!mfd->apbreg_regs) { + mfd->regs[index] = ioremap(res->start, resource_size(res)); + if (!mfd->regs[index]) { release_mem_region(res->start, resource_size(res)); return -ENOMEM; } - dev_dbg(&dev->dev, "%s: regbase %p\n", __func__, mfd->apbreg_regs); - - apbreg_regset.base = mfd->apbreg_regs; - sta2x11_apbreg_debugfs = debugfs_create_regset32("sta2x11-apbreg", - S_IFREG | S_IRUGO, - NULL, &apbreg_regset); + regset->base = mfd->regs[index]; + sta2x11_mfd_debugfs[index] = debugfs_create_regset32(name, + S_IFREG | S_IRUGO, + NULL, regset); return 0; } -/* The two platform drivers */ +static int sta2x11_sctl_probe(struct platform_device *dev) +{ + return sta2x11_mfd_platform_probe(dev, sta2x11_sctl); +} + +static int sta2x11_apbreg_probe(struct platform_device *dev) +{ + return sta2x11_mfd_platform_probe(dev, sta2x11_apbreg); +} + +static int sta2x11_apb_soc_regs_probe(struct platform_device *dev) +{ + return sta2x11_mfd_platform_probe(dev, sta2x11_apb_soc_regs); +} + +/* The three platform drivers */ static struct platform_driver sta2x11_sctl_platform_driver = { .driver = { .name = "sta2x11-sctl", @@ -280,13 +320,29 @@ static int __init sta2x11_apbreg_init(void) return platform_driver_register(&sta2x11_platform_driver); } +static struct platform_driver sta2x11_apb_soc_regs_platform_driver = { + .driver = { + .name = "sta2x11-apb-soc-regs", + .owner = THIS_MODULE, + }, + .probe = sta2x11_apb_soc_regs_probe, +}; + +static int __init sta2x11_apb_soc_regs_init(void) +{ + pr_info("%s\n", __func__); + return platform_driver_register(&sta2x11_apb_soc_regs_platform_driver); +} + /* - * What follows is the PCI device that hosts the above two pdevs. + * What follows are the PCI devices that host the above pdevs. * Each logic block is 4kB and they are all consecutive: we use this info. */ -/* Bar 0 */ -enum bar0_cells { +/* Mfd 0 device */ + +/* Mfd 0, Bar 0 */ +enum mfd0_bar0_cells { STA2X11_GPIO_0 = 0, STA2X11_GPIO_1, STA2X11_GPIO_2, @@ -295,8 +351,8 @@ enum bar0_cells { STA2X11_SCR, STA2X11_TIME, }; -/* Bar 1 */ -enum bar1_cells { +/* Mfd 0 , Bar 1 */ +enum mfd0_bar1_cells { STA2X11_APBREG = 0, }; #define CELL_4K(_name, _cell) { \ @@ -330,17 +386,46 @@ static const __devinitconst struct resource apbreg_resources[] = { #define DEV(_name, _r) \ { .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, } -static __devinitdata struct mfd_cell sta2x11_mfd_bar0[] = { +static __devinitdata struct mfd_cell sta2x11_mfd0_bar0[] = { DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */ DEV("sta2x11-sctl", sctl_resources), DEV("sta2x11-scr", scr_resources), DEV("sta2x11-time", time_resources), }; -static __devinitdata struct mfd_cell sta2x11_mfd_bar1[] = { +static __devinitdata struct mfd_cell sta2x11_mfd0_bar1[] = { DEV("sta2x11-apbreg", apbreg_resources), }; +/* Mfd 1 devices */ + +/* Mfd 1, Bar 0 */ +enum mfd1_bar0_cells { + STA2X11_VIC = 0, +}; + +/* Mfd 1, Bar 1 */ +enum mfd1_bar1_cells { + STA2X11_APB_SOC_REGS = 0, +}; + +static const __devinitconst struct resource vic_resources[] = { + CELL_4K("sta2x11-vic", STA2X11_VIC), +}; + +static const __devinitconst struct resource apb_soc_regs_resources[] = { + CELL_4K("sta2x11-apb-soc-regs", STA2X11_APB_SOC_REGS), +}; + +static __devinitdata struct mfd_cell sta2x11_mfd1_bar0[] = { + DEV("sta2x11-vic", vic_resources), +}; + +static __devinitdata struct mfd_cell sta2x11_mfd1_bar1[] = { + DEV("sta2x11-apb-soc-regs", apb_soc_regs_resources), +}; + + static int sta2x11_mfd_suspend(struct pci_dev *pdev, pm_message_t state) { pci_save_state(pdev); @@ -363,10 +448,63 @@ static int sta2x11_mfd_resume(struct pci_dev *pdev) return 0; } +struct sta2x11_mfd_bar_setup_data { + struct mfd_cell *cells; + int ncells; +}; + +struct sta2x11_mfd_setup_data { + struct sta2x11_mfd_bar_setup_data bars[2]; +}; + +#define STA2X11_MFD0 0 +#define STA2X11_MFD1 1 + +static struct sta2x11_mfd_setup_data mfd_setup_data[] = { + /* Mfd 0: gpio, sctl, scr, timers / apbregs */ + [STA2X11_MFD0] = { + .bars = { + [0] = { + .cells = sta2x11_mfd0_bar0, + .ncells = ARRAY_SIZE(sta2x11_mfd0_bar0), + }, + [1] = { + .cells = sta2x11_mfd0_bar1, + .ncells = ARRAY_SIZE(sta2x11_mfd0_bar1), + }, + }, + }, + /* Mfd 1: vic / apb-soc-regs */ + [STA2X11_MFD1] = { + .bars = { + [0] = { + .cells = sta2x11_mfd1_bar0, + .ncells = ARRAY_SIZE(sta2x11_mfd1_bar0), + }, + [1] = { + .cells = sta2x11_mfd1_bar1, + .ncells = ARRAY_SIZE(sta2x11_mfd1_bar1), + }, + }, + }, +}; + +static void __devinit sta2x11_mfd_setup(struct pci_dev *pdev, + struct sta2x11_mfd_setup_data *sd) +{ + int i, j; + for (i = 0; i < ARRAY_SIZE(sd->bars); i++) + for (j = 0; j < sd->bars[i].ncells; j++) { + sd->bars[i].cells[j].pdata_size = sizeof(pdev); + sd->bars[i].cells[j].platform_data = &pdev; + } +} + static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) { int err, i; + struct sta2x11_mfd_setup_data *setup_data; struct sta2x11_gpio_pdata *gpio_data; dev_info(&pdev->dev, "%s\n", __func__); @@ -381,6 +519,10 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev, if (err) dev_info(&pdev->dev, "Enable msi failed\n"); + setup_data = pci_id->device == PCI_DEVICE_ID_STMICRO_GPIO ? + &mfd_setup_data[STA2X11_MFD0] : + &mfd_setup_data[STA2X11_MFD1]; + /* Read gpio config data as pci device's platform data */ gpio_data = dev_get_platdata(&pdev->dev); if (!gpio_data) @@ -392,35 +534,23 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev, pdev, &pdev); /* platform data is the pci device for all of them */ - for (i = 0; i < ARRAY_SIZE(sta2x11_mfd_bar0); i++) { - sta2x11_mfd_bar0[i].pdata_size = sizeof(pdev); - sta2x11_mfd_bar0[i].platform_data = &pdev; - } - sta2x11_mfd_bar1[0].pdata_size = sizeof(pdev); - sta2x11_mfd_bar1[0].platform_data = &pdev; + sta2x11_mfd_setup(pdev, setup_data); /* Record this pdev before mfd_add_devices: their probe looks for it */ sta2x11_mfd_add(pdev, GFP_ATOMIC); - - err = mfd_add_devices(&pdev->dev, -1, - sta2x11_mfd_bar0, - ARRAY_SIZE(sta2x11_mfd_bar0), - &pdev->resource[0], - 0, NULL); - if (err) { - dev_err(&pdev->dev, "mfd_add_devices[0] failed: %d\n", err); - goto err_disable; - } - - err = mfd_add_devices(&pdev->dev, -1, - sta2x11_mfd_bar1, - ARRAY_SIZE(sta2x11_mfd_bar1), - &pdev->resource[1], - 0, NULL); - if (err) { - dev_err(&pdev->dev, "mfd_add_devices[1] failed: %d\n", err); - goto err_disable; + /* Just 2 bars for all mfd's at present */ + for (i = 0; i < 2; i++) { + err = mfd_add_devices(&pdev->dev, -1, + setup_data->bars[i].cells, + setup_data->bars[i].ncells, + &pdev->resource[i], + 0, NULL); + if (err) { + dev_err(&pdev->dev, + "mfd_add_devices[%d] failed: %d\n", i, err); + goto err_disable; + } } return 0; @@ -434,6 +564,7 @@ err_disable: static DEFINE_PCI_DEVICE_TABLE(sta2x11_mfd_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_GPIO)}, + {PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_VIC)}, {0,}, }; @@ -459,6 +590,7 @@ static int __init sta2x11_mfd_init(void) */ subsys_initcall(sta2x11_apbreg_init); subsys_initcall(sta2x11_sctl_init); +subsys_initcall(sta2x11_apb_soc_regs_init); rootfs_initcall(sta2x11_mfd_init); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h index d179227e866f..4d858797d7e2 100644 --- a/include/linux/mfd/sta2x11-mfd.h +++ b/include/linux/mfd/sta2x11-mfd.h @@ -26,6 +26,20 @@ #include #include +enum sta2x11_mfd_plat_dev { + sta2x11_sctl = 0, + sta2x11_gpio, + sta2x11_scr, + sta2x11_time, + sta2x11_apbreg, + sta2x11_apb_soc_regs, + sta2x11_vic, + sta2x11_n_mfd_plat_devs, +}; + +extern u32 +__sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev); + /* * The MFD PCI block includes the GPIO peripherals and other register blocks. * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".) @@ -182,7 +196,11 @@ struct sta2x11_gpio_pdata { * The APB bridge has its own registers, needed by our users as well. * They are accessed with the following read/mask/write function. */ -u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val); +static inline u32 +sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) +{ + return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apbreg); +} /* CAN and MLB */ #define APBREG_BSR 0x00 /* Bridge Status Reg */ @@ -211,7 +229,11 @@ u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val); * The system controller has its own registers. Some of these are accessed * by out users as well, using the following read/mask/write/function */ -u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val); +static inline +u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) +{ + return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_sctl); +} #define SCTL_SCCTL 0x00 /* System controller control register */ #define SCTL_ARMCFG 0x04 /* ARM configuration register */ @@ -321,4 +343,134 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val); #define SCTL_SCPEREN1_I2C3 (1 << 16) #define SCTL_SCPEREN1_USB_PHY (1 << 17) +/* + * APB-SOC registers + */ +static inline +u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) +{ + return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apb_soc_regs); +} + +#define PCIE_EP1_FUNC3_0_INTR_REG 0x000 +#define PCIE_EP1_FUNC7_4_INTR_REG 0x004 +#define PCIE_EP2_FUNC3_0_INTR_REG 0x008 +#define PCIE_EP2_FUNC7_4_INTR_REG 0x00c +#define PCIE_EP3_FUNC3_0_INTR_REG 0x010 +#define PCIE_EP3_FUNC7_4_INTR_REG 0x014 +#define PCIE_EP4_FUNC3_0_INTR_REG 0x018 +#define PCIE_EP4_FUNC7_4_INTR_REG 0x01c +#define PCIE_INTR_ENABLE0_REG 0x020 +#define PCIE_INTR_ENABLE1_REG 0x024 +#define PCIE_EP1_FUNC_TC_REG 0x028 +#define PCIE_EP2_FUNC_TC_REG 0x02c +#define PCIE_EP3_FUNC_TC_REG 0x030 +#define PCIE_EP4_FUNC_TC_REG 0x034 +#define PCIE_EP1_FUNC_F_REG 0x038 +#define PCIE_EP2_FUNC_F_REG 0x03c +#define PCIE_EP3_FUNC_F_REG 0x040 +#define PCIE_EP4_FUNC_F_REG 0x044 +#define PCIE_PAB_AMBA_SW_RST_REG 0x048 +#define PCIE_PM_STATUS_0_PORT_0_4 0x04c +#define PCIE_PM_STATUS_7_0_EP1 0x050 +#define PCIE_PM_STATUS_7_0_EP2 0x054 +#define PCIE_PM_STATUS_7_0_EP3 0x058 +#define PCIE_PM_STATUS_7_0_EP4 0x05c +#define PCIE_DEV_ID_0_EP1_REG 0x060 +#define PCIE_CC_REV_ID_0_EP1_REG 0x064 +#define PCIE_DEV_ID_1_EP1_REG 0x068 +#define PCIE_CC_REV_ID_1_EP1_REG 0x06c +#define PCIE_DEV_ID_2_EP1_REG 0x070 +#define PCIE_CC_REV_ID_2_EP1_REG 0x074 +#define PCIE_DEV_ID_3_EP1_REG 0x078 +#define PCIE_CC_REV_ID_3_EP1_REG 0x07c +#define PCIE_DEV_ID_4_EP1_REG 0x080 +#define PCIE_CC_REV_ID_4_EP1_REG 0x084 +#define PCIE_DEV_ID_5_EP1_REG 0x088 +#define PCIE_CC_REV_ID_5_EP1_REG 0x08c +#define PCIE_DEV_ID_6_EP1_REG 0x090 +#define PCIE_CC_REV_ID_6_EP1_REG 0x094 +#define PCIE_DEV_ID_7_EP1_REG 0x098 +#define PCIE_CC_REV_ID_7_EP1_REG 0x09c +#define PCIE_DEV_ID_0_EP2_REG 0x0a0 +#define PCIE_CC_REV_ID_0_EP2_REG 0x0a4 +#define PCIE_DEV_ID_1_EP2_REG 0x0a8 +#define PCIE_CC_REV_ID_1_EP2_REG 0x0ac +#define PCIE_DEV_ID_2_EP2_REG 0x0b0 +#define PCIE_CC_REV_ID_2_EP2_REG 0x0b4 +#define PCIE_DEV_ID_3_EP2_REG 0x0b8 +#define PCIE_CC_REV_ID_3_EP2_REG 0x0bc +#define PCIE_DEV_ID_4_EP2_REG 0x0c0 +#define PCIE_CC_REV_ID_4_EP2_REG 0x0c4 +#define PCIE_DEV_ID_5_EP2_REG 0x0c8 +#define PCIE_CC_REV_ID_5_EP2_REG 0x0cc +#define PCIE_DEV_ID_6_EP2_REG 0x0d0 +#define PCIE_CC_REV_ID_6_EP2_REG 0x0d4 +#define PCIE_DEV_ID_7_EP2_REG 0x0d8 +#define PCIE_CC_REV_ID_7_EP2_REG 0x0dC +#define PCIE_DEV_ID_0_EP3_REG 0x0e0 +#define PCIE_CC_REV_ID_0_EP3_REG 0x0e4 +#define PCIE_DEV_ID_1_EP3_REG 0x0e8 +#define PCIE_CC_REV_ID_1_EP3_REG 0x0ec +#define PCIE_DEV_ID_2_EP3_REG 0x0f0 +#define PCIE_CC_REV_ID_2_EP3_REG 0x0f4 +#define PCIE_DEV_ID_3_EP3_REG 0x0f8 +#define PCIE_CC_REV_ID_3_EP3_REG 0x0fc +#define PCIE_DEV_ID_4_EP3_REG 0x100 +#define PCIE_CC_REV_ID_4_EP3_REG 0x104 +#define PCIE_DEV_ID_5_EP3_REG 0x108 +#define PCIE_CC_REV_ID_5_EP3_REG 0x10c +#define PCIE_DEV_ID_6_EP3_REG 0x110 +#define PCIE_CC_REV_ID_6_EP3_REG 0x114 +#define PCIE_DEV_ID_7_EP3_REG 0x118 +#define PCIE_CC_REV_ID_7_EP3_REG 0x11c +#define PCIE_DEV_ID_0_EP4_REG 0x120 +#define PCIE_CC_REV_ID_0_EP4_REG 0x124 +#define PCIE_DEV_ID_1_EP4_REG 0x128 +#define PCIE_CC_REV_ID_1_EP4_REG 0x12c +#define PCIE_DEV_ID_2_EP4_REG 0x130 +#define PCIE_CC_REV_ID_2_EP4_REG 0x134 +#define PCIE_DEV_ID_3_EP4_REG 0x138 +#define PCIE_CC_REV_ID_3_EP4_REG 0x13c +#define PCIE_DEV_ID_4_EP4_REG 0x140 +#define PCIE_CC_REV_ID_4_EP4_REG 0x144 +#define PCIE_DEV_ID_5_EP4_REG 0x148 +#define PCIE_CC_REV_ID_5_EP4_REG 0x14c +#define PCIE_DEV_ID_6_EP4_REG 0x150 +#define PCIE_CC_REV_ID_6_EP4_REG 0x154 +#define PCIE_DEV_ID_7_EP4_REG 0x158 +#define PCIE_CC_REV_ID_7_EP4_REG 0x15c +#define PCIE_SUBSYS_VEN_ID_REG 0x160 +#define PCIE_COMMON_CLOCK_CONFIG_0_4_0 0x164 +#define PCIE_MIPHYP_SSC_EN_REG 0x168 +#define PCIE_MIPHYP_ADDR_REG 0x16c +#define PCIE_L1_ASPM_READY_REG 0x170 +#define PCIE_EXT_CFG_RDY_REG 0x174 +#define PCIE_SoC_INT_ROUTER_STATUS0_REG 0x178 +#define PCIE_SoC_INT_ROUTER_STATUS1_REG 0x17c +#define PCIE_SoC_INT_ROUTER_STATUS2_REG 0x180 +#define PCIE_SoC_INT_ROUTER_STATUS3_REG 0x184 +#define DMA_IP_CTRL_REG 0x324 +#define DISP_BRIDGE_PU_PD_CTRL_REG 0x328 +#define VIP_PU_PD_CTRL_REG 0x32c +#define USB_MLB_PU_PD_CTRL_REG 0x330 +#define SDIO_PU_PD_MISCFUNC_CTRL_REG1 0x334 +#define SDIO_PU_PD_MISCFUNC_CTRL_REG2 0x338 +#define UART_PU_PD_CTRL_REG 0x33c +#define ARM_Lock 0x340 +#define SYS_IO_CHAR_REG1 0x344 +#define SYS_IO_CHAR_REG2 0x348 +#define SATA_CORE_ID_REG 0x34c +#define SATA_CTRL_REG 0x350 +#define I2C_HSFIX_MISC_REG 0x354 +#define SPARE2_RESERVED 0x358 +#define SPARE3_RESERVED 0x35c +#define MASTER_LOCK_REG 0x368 +#define SYSTEM_CONFIG_STATUS_REG 0x36c +#define MSP_CLK_CTRL_REG 0x39c +#define COMPENSATION_REG1 0x3c4 +#define COMPENSATION_REG2 0x3c8 +#define COMPENSATION_REG3 0x3cc +#define TEST_CTL_REG 0x3d0 + #endif /* __STA2X11_MFD_H */ -- cgit v1.2.3 From d94e25535a7979a6c81922496f475a5dd0e006b4 Mon Sep 17 00:00:00 2001 From: Davide Ciminaghi Date: Fri, 9 Nov 2012 15:19:53 +0100 Subject: mfd: sta2x11-mfd: Add regmap support Signed-off-by: Davide Ciminaghi Acked-by: Alessandro Rubini Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 1 + drivers/mfd/sta2x11-mfd.c | 234 ++++++++++++++++++++++------------------ include/linux/mfd/sta2x11-mfd.h | 1 + 3 files changed, 131 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index b280639a7634..59359a7a2493 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1046,6 +1046,7 @@ config MFD_STA2X11 bool "STA2X11 multi function device support" depends on STA2X11 select MFD_CORE + select REGMAP_MMIO config MFD_SYSCON bool "System Controller Register R/W Based on Regmap" diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c index 9e01b84aa8bc..0cac2013bbf6 100644 --- a/drivers/mfd/sta2x11-mfd.c +++ b/drivers/mfd/sta2x11-mfd.c @@ -27,17 +27,25 @@ #include #include #include -#include #include #include #include #include +#include #include +static inline int __reg_within_range(unsigned int r, + unsigned int start, + unsigned int end) +{ + return ((r >= start) && (r <= end)); +} + /* This describes STA2X11 MFD chip for us, we may have several */ struct sta2x11_mfd { struct sta2x11_instance *instance; + struct regmap *regmap[sta2x11_n_mfd_plat_devs]; spinlock_t lock; struct list_head list; void __iomem *regs[sta2x11_n_mfd_plat_devs]; @@ -127,118 +135,126 @@ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val, } EXPORT_SYMBOL(__sta2x11_mfd_mask); -/* Two debugfs files, for our registers (FIXME: one instance only) */ -#define REG(regname) {.name = #regname, .offset = SCTL_ ## regname} -static struct debugfs_reg32 sta2x11_sctl_regs[] = { - REG(SCCTL), REG(ARMCFG), REG(SCPLLCTL), REG(SCPLLFCTRL), - REG(SCRESFRACT), REG(SCRESCTRL1), REG(SCRESXTRL2), REG(SCPEREN0), - REG(SCPEREN1), REG(SCPEREN2), REG(SCGRST), REG(SCPCIPMCR1), - REG(SCPCIPMCR2), REG(SCPCIPMSR1), REG(SCPCIPMSR2), REG(SCPCIPMSR3), - REG(SCINTREN), REG(SCRISR), REG(SCCLKSTAT0), REG(SCCLKSTAT1), - REG(SCCLKSTAT2), REG(SCRSTSTA), -}; -#undef REG +/* + * Special sta2x11-mfd regmap lock/unlock functions + */ -static struct debugfs_regset32 sctl_regset = { - .regs = sta2x11_sctl_regs, - .nregs = ARRAY_SIZE(sta2x11_sctl_regs), -}; +static void sta2x11_regmap_lock(void *__lock) +{ + spinlock_t *lock = __lock; + spin_lock(lock); +} -#define REG(regname) {.name = #regname, .offset = regname} -static struct debugfs_reg32 sta2x11_apbreg_regs[] = { - REG(APBREG_BSR), REG(APBREG_PAER), REG(APBREG_PWAC), REG(APBREG_PRAC), - REG(APBREG_PCG), REG(APBREG_PUR), REG(APBREG_EMU_PCG), -}; -#undef REG +static void sta2x11_regmap_unlock(void *__lock) +{ + spinlock_t *lock = __lock; + spin_unlock(lock); +} -static struct debugfs_regset32 apbreg_regset = { - .regs = sta2x11_apbreg_regs, - .nregs = ARRAY_SIZE(sta2x11_apbreg_regs), +static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = { + [sta2x11_sctl] = "sta2x11-sctl", + [sta2x11_apbreg] = "sta2x11-apbreg", + [sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs", }; -#define REG(regname) {.name = #regname, .offset = regname} -static struct debugfs_reg32 sta2x11_apb_soc_regs_regs[] = { - REG(PCIE_EP1_FUNC3_0_INTR_REG), REG(PCIE_EP1_FUNC7_4_INTR_REG), - REG(PCIE_EP2_FUNC3_0_INTR_REG), REG(PCIE_EP2_FUNC7_4_INTR_REG), - REG(PCIE_EP3_FUNC3_0_INTR_REG), REG(PCIE_EP3_FUNC7_4_INTR_REG), - REG(PCIE_EP4_FUNC3_0_INTR_REG), REG(PCIE_EP4_FUNC7_4_INTR_REG), - REG(PCIE_INTR_ENABLE0_REG), REG(PCIE_INTR_ENABLE1_REG), - REG(PCIE_EP1_FUNC_TC_REG), REG(PCIE_EP2_FUNC_TC_REG), - REG(PCIE_EP3_FUNC_TC_REG), REG(PCIE_EP4_FUNC_TC_REG), - REG(PCIE_EP1_FUNC_F_REG), REG(PCIE_EP2_FUNC_F_REG), - REG(PCIE_EP3_FUNC_F_REG), REG(PCIE_EP4_FUNC_F_REG), - REG(PCIE_PAB_AMBA_SW_RST_REG), REG(PCIE_PM_STATUS_0_PORT_0_4), - REG(PCIE_PM_STATUS_7_0_EP1), REG(PCIE_PM_STATUS_7_0_EP2), - REG(PCIE_PM_STATUS_7_0_EP3), REG(PCIE_PM_STATUS_7_0_EP4), - REG(PCIE_DEV_ID_0_EP1_REG), REG(PCIE_CC_REV_ID_0_EP1_REG), - REG(PCIE_DEV_ID_1_EP1_REG), REG(PCIE_CC_REV_ID_1_EP1_REG), - REG(PCIE_DEV_ID_2_EP1_REG), REG(PCIE_CC_REV_ID_2_EP1_REG), - REG(PCIE_DEV_ID_3_EP1_REG), REG(PCIE_CC_REV_ID_3_EP1_REG), - REG(PCIE_DEV_ID_4_EP1_REG), REG(PCIE_CC_REV_ID_4_EP1_REG), - REG(PCIE_DEV_ID_5_EP1_REG), REG(PCIE_CC_REV_ID_5_EP1_REG), - REG(PCIE_DEV_ID_6_EP1_REG), REG(PCIE_CC_REV_ID_6_EP1_REG), - REG(PCIE_DEV_ID_7_EP1_REG), REG(PCIE_CC_REV_ID_7_EP1_REG), - REG(PCIE_DEV_ID_0_EP2_REG), REG(PCIE_CC_REV_ID_0_EP2_REG), - REG(PCIE_DEV_ID_1_EP2_REG), REG(PCIE_CC_REV_ID_1_EP2_REG), - REG(PCIE_DEV_ID_2_EP2_REG), REG(PCIE_CC_REV_ID_2_EP2_REG), - REG(PCIE_DEV_ID_3_EP2_REG), REG(PCIE_CC_REV_ID_3_EP2_REG), - REG(PCIE_DEV_ID_4_EP2_REG), REG(PCIE_CC_REV_ID_4_EP2_REG), - REG(PCIE_DEV_ID_5_EP2_REG), REG(PCIE_CC_REV_ID_5_EP2_REG), - REG(PCIE_DEV_ID_6_EP2_REG), REG(PCIE_CC_REV_ID_6_EP2_REG), - REG(PCIE_DEV_ID_7_EP2_REG), REG(PCIE_CC_REV_ID_7_EP2_REG), - REG(PCIE_DEV_ID_0_EP3_REG), REG(PCIE_CC_REV_ID_0_EP3_REG), - REG(PCIE_DEV_ID_1_EP3_REG), REG(PCIE_CC_REV_ID_1_EP3_REG), - REG(PCIE_DEV_ID_2_EP3_REG), REG(PCIE_CC_REV_ID_2_EP3_REG), - REG(PCIE_DEV_ID_3_EP3_REG), REG(PCIE_CC_REV_ID_3_EP3_REG), - REG(PCIE_DEV_ID_4_EP3_REG), REG(PCIE_CC_REV_ID_4_EP3_REG), - REG(PCIE_DEV_ID_5_EP3_REG), REG(PCIE_CC_REV_ID_5_EP3_REG), - REG(PCIE_DEV_ID_6_EP3_REG), REG(PCIE_CC_REV_ID_6_EP3_REG), - REG(PCIE_DEV_ID_7_EP3_REG), REG(PCIE_CC_REV_ID_7_EP3_REG), - REG(PCIE_DEV_ID_0_EP4_REG), REG(PCIE_CC_REV_ID_0_EP4_REG), - REG(PCIE_DEV_ID_1_EP4_REG), REG(PCIE_CC_REV_ID_1_EP4_REG), - REG(PCIE_DEV_ID_2_EP4_REG), REG(PCIE_CC_REV_ID_2_EP4_REG), - REG(PCIE_DEV_ID_3_EP4_REG), REG(PCIE_CC_REV_ID_3_EP4_REG), - REG(PCIE_DEV_ID_4_EP4_REG), REG(PCIE_CC_REV_ID_4_EP4_REG), - REG(PCIE_DEV_ID_5_EP4_REG), REG(PCIE_CC_REV_ID_5_EP4_REG), - REG(PCIE_DEV_ID_6_EP4_REG), REG(PCIE_CC_REV_ID_6_EP4_REG), - REG(PCIE_DEV_ID_7_EP4_REG), REG(PCIE_CC_REV_ID_7_EP4_REG), - REG(PCIE_SUBSYS_VEN_ID_REG), REG(PCIE_COMMON_CLOCK_CONFIG_0_4_0), - REG(PCIE_MIPHYP_SSC_EN_REG), REG(PCIE_MIPHYP_ADDR_REG), - REG(PCIE_L1_ASPM_READY_REG), REG(PCIE_EXT_CFG_RDY_REG), - REG(PCIE_SoC_INT_ROUTER_STATUS0_REG), - REG(PCIE_SoC_INT_ROUTER_STATUS1_REG), - REG(PCIE_SoC_INT_ROUTER_STATUS2_REG), - REG(PCIE_SoC_INT_ROUTER_STATUS3_REG), - REG(DMA_IP_CTRL_REG), REG(DISP_BRIDGE_PU_PD_CTRL_REG), - REG(VIP_PU_PD_CTRL_REG), REG(USB_MLB_PU_PD_CTRL_REG), - REG(SDIO_PU_PD_MISCFUNC_CTRL_REG1), REG(SDIO_PU_PD_MISCFUNC_CTRL_REG2), - REG(UART_PU_PD_CTRL_REG), REG(ARM_Lock), REG(SYS_IO_CHAR_REG1), - REG(SYS_IO_CHAR_REG2), REG(SATA_CORE_ID_REG), REG(SATA_CTRL_REG), - REG(I2C_HSFIX_MISC_REG), REG(SPARE2_RESERVED), REG(SPARE3_RESERVED), - REG(MASTER_LOCK_REG), REG(SYSTEM_CONFIG_STATUS_REG), - REG(MSP_CLK_CTRL_REG), REG(COMPENSATION_REG1), REG(COMPENSATION_REG2), - REG(COMPENSATION_REG3), REG(TEST_CTL_REG), +static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg) +{ + return !__reg_within_range(reg, SCTL_SCPCIECSBRST, SCTL_SCRSTSTA); +} + +static struct regmap_config sta2x11_sctl_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .lock = sta2x11_regmap_lock, + .unlock = sta2x11_regmap_unlock, + .max_register = SCTL_SCRSTSTA, + .writeable_reg = sta2x11_sctl_writeable_reg, }; -#undef REG -static struct debugfs_regset32 apb_soc_regs_regset = { - .regs = sta2x11_apb_soc_regs_regs, - .nregs = ARRAY_SIZE(sta2x11_apb_soc_regs_regs), +static bool sta2x11_apbreg_readable_reg(struct device *dev, unsigned int reg) +{ + /* Two blocks (CAN and MLB, SARAC) 0x100 bytes apart */ + if (reg >= APBREG_BSR_SARAC) + reg -= APBREG_BSR_SARAC; + switch (reg) { + case APBREG_BSR: + case APBREG_PAER: + case APBREG_PWAC: + case APBREG_PRAC: + case APBREG_PCG: + case APBREG_PUR: + case APBREG_EMU_PCG: + return true; + default: + return false; + } +} + +static bool sta2x11_apbreg_writeable_reg(struct device *dev, unsigned int reg) +{ + if (reg >= APBREG_BSR_SARAC) + reg -= APBREG_BSR_SARAC; + if (!sta2x11_apbreg_readable_reg(dev, reg)) + return false; + return reg != APBREG_PAER; +} + +static struct regmap_config sta2x11_apbreg_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .lock = sta2x11_regmap_lock, + .unlock = sta2x11_regmap_unlock, + .max_register = APBREG_EMU_PCG_SARAC, + .readable_reg = sta2x11_apbreg_readable_reg, + .writeable_reg = sta2x11_apbreg_writeable_reg, }; +static bool sta2x11_apb_soc_regs_readable_reg(struct device *dev, + unsigned int reg) +{ + return reg <= PCIE_SoC_INT_ROUTER_STATUS3_REG || + __reg_within_range(reg, DMA_IP_CTRL_REG, SPARE3_RESERVED) || + __reg_within_range(reg, MASTER_LOCK_REG, + SYSTEM_CONFIG_STATUS_REG) || + reg == MSP_CLK_CTRL_REG || + __reg_within_range(reg, COMPENSATION_REG1, TEST_CTL_REG); +} -static struct dentry *sta2x11_mfd_debugfs[sta2x11_n_mfd_plat_devs]; +static bool sta2x11_apb_soc_regs_writeable_reg(struct device *dev, + unsigned int reg) +{ + if (!sta2x11_apb_soc_regs_readable_reg(dev, reg)) + return false; + switch (reg) { + case PCIE_COMMON_CLOCK_CONFIG_0_4_0: + case SYSTEM_CONFIG_STATUS_REG: + case COMPENSATION_REG1: + case PCIE_SoC_INT_ROUTER_STATUS0_REG...PCIE_SoC_INT_ROUTER_STATUS3_REG: + case PCIE_PM_STATUS_0_PORT_0_4...PCIE_PM_STATUS_7_0_EP4: + return false; + default: + return true; + } +} -static struct debugfs_regset32 *sta2x11_mfd_regset[sta2x11_n_mfd_plat_devs] = { - [sta2x11_sctl] = &sctl_regset, - [sta2x11_apbreg] = &apbreg_regset, - [sta2x11_apb_soc_regs] = &apb_soc_regs_regset, +static struct regmap_config sta2x11_apb_soc_regs_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .lock = sta2x11_regmap_lock, + .unlock = sta2x11_regmap_unlock, + .max_register = TEST_CTL_REG, + .readable_reg = sta2x11_apb_soc_regs_readable_reg, + .writeable_reg = sta2x11_apb_soc_regs_writeable_reg, }; -static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = { - [sta2x11_sctl] = "sta2x11-sctl", - [sta2x11_apbreg] = "sta2x11-apbreg", - [sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs", +static struct regmap_config * +sta2x11_mfd_regmap_configs[sta2x11_n_mfd_plat_devs] = { + [sta2x11_sctl] = &sta2x11_sctl_regmap_config, + [sta2x11_apbreg] = &sta2x11_apbreg_regmap_config, + [sta2x11_apb_soc_regs] = &sta2x11_apb_soc_regs_regmap_config, }; /* Probe for the three platform devices */ @@ -250,12 +266,14 @@ static int sta2x11_mfd_platform_probe(struct platform_device *dev, struct sta2x11_mfd *mfd; struct resource *res; const char *name = sta2x11_mfd_names[index]; - struct debugfs_regset32 *regset = sta2x11_mfd_regset[index]; + struct regmap_config *regmap_config = sta2x11_mfd_regmap_configs[index]; pdev = dev->dev.platform_data; mfd = sta2x11_mfd_find(*pdev); if (!mfd) return -ENODEV; + if (!regmap_config) + return -ENODEV; res = platform_get_resource(dev, IORESOURCE_MEM, 0); if (!res) @@ -269,10 +287,16 @@ static int sta2x11_mfd_platform_probe(struct platform_device *dev, release_mem_region(res->start, resource_size(res)); return -ENOMEM; } - regset->base = mfd->regs[index]; - sta2x11_mfd_debugfs[index] = debugfs_create_regset32(name, - S_IFREG | S_IRUGO, - NULL, regset); + regmap_config->lock_arg = &mfd->lock; + /* + No caching, registers could be reached both via regmap and via + void __iomem * + */ + regmap_config->cache_type = REGCACHE_NONE; + mfd->regmap[index] = devm_regmap_init_mmio(&dev->dev, mfd->regs[index], + regmap_config); + WARN_ON(!mfd->regmap[index]); + return 0; } diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h index 4d858797d7e2..83344304f2cf 100644 --- a/include/linux/mfd/sta2x11-mfd.h +++ b/include/linux/mfd/sta2x11-mfd.h @@ -246,6 +246,7 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) #define SCTL_SCPEREN1 0x20 /* Peripheral clock enable register 1 */ #define SCTL_SCPEREN2 0x24 /* Peripheral clock enable register 2 */ #define SCTL_SCGRST 0x28 /* Peripheral global reset */ +#define SCTL_SCPCIECSBRST 0x2c /* PCIe PAB CSB reset status register */ #define SCTL_SCPCIPMCR1 0x30 /* PCI power management control 1 */ #define SCTL_SCPCIPMCR2 0x34 /* PCI power management control 2 */ #define SCTL_SCPCIPMSR1 0x38 /* PCI power management status 1 */ -- cgit v1.2.3 From 29f5b5a326b44c55e81b15308255ba695fecb323 Mon Sep 17 00:00:00 2001 From: Davide Ciminaghi Date: Fri, 9 Nov 2012 15:19:54 +0100 Subject: mfd: sta2x11-mfd: Add sta2x11_mfd_get_regs_data() function A couple of predefined clocks (mux and gated) need to be initialized with the virtual address of the clock's controlling register and the address of a spinlock used to protect against races. This function exports such data for all the mfd cells. Signed-off-by: Davide Ciminaghi Acked-by: Alessandro Rubini Signed-off-by: Samuel Ortiz --- drivers/mfd/sta2x11-mfd.c | 22 ++++++++++++++++++++++ include/linux/mfd/sta2x11-mfd.h | 5 +++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c index 0cac2013bbf6..6d12ab42aba3 100644 --- a/drivers/mfd/sta2x11-mfd.c +++ b/drivers/mfd/sta2x11-mfd.c @@ -135,6 +135,28 @@ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val, } EXPORT_SYMBOL(__sta2x11_mfd_mask); +int sta2x11_mfd_get_regs_data(struct platform_device *dev, + enum sta2x11_mfd_plat_dev index, + void __iomem **regs, + spinlock_t **lock) +{ + struct pci_dev *pdev = *(struct pci_dev **)(dev->dev.platform_data); + struct sta2x11_mfd *mfd; + + if (!pdev) + return -ENODEV; + mfd = sta2x11_mfd_find(pdev); + if (!mfd) + return -ENODEV; + if (index >= sta2x11_n_mfd_plat_devs) + return -ENODEV; + *regs = mfd->regs[index]; + *lock = &mfd->lock[index]; + pr_debug("%s %d *regs = %p\n", __func__, __LINE__, *regs); + return *regs ? 0 : -ENODEV; +} +EXPORT_SYMBOL(sta2x11_mfd_get_regs_data); + /* * Special sta2x11-mfd regmap lock/unlock functions */ diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h index 83344304f2cf..e813e5efbe4e 100644 --- a/include/linux/mfd/sta2x11-mfd.h +++ b/include/linux/mfd/sta2x11-mfd.h @@ -474,4 +474,9 @@ u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) #define COMPENSATION_REG3 0x3cc #define TEST_CTL_REG 0x3d0 +extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev, + enum sta2x11_mfd_plat_dev index, + void __iomem **regs, + spinlock_t **lock); + #endif /* __STA2X11_MFD_H */ -- cgit v1.2.3 From b18adafccd497245a6bc5b867bf9cba7e01f8729 Mon Sep 17 00:00:00 2001 From: Davide Ciminaghi Date: Fri, 9 Nov 2012 15:19:55 +0100 Subject: mfd: sta2x11-mfd: Use defines for platform devices' names Since there are now many sta2x11-mfd platform devices, using defines for their names looks like a better solution. Signed-off-by: Davide Ciminaghi Acked-by: Alessandro Rubini Signed-off-by: Samuel Ortiz --- drivers/mfd/sta2x11-mfd.c | 42 +++++++++++++++++++++-------------------- include/linux/mfd/sta2x11-mfd.h | 8 ++++++++ 2 files changed, 30 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c index 6d12ab42aba3..8d38ef264722 100644 --- a/drivers/mfd/sta2x11-mfd.c +++ b/drivers/mfd/sta2x11-mfd.c @@ -174,9 +174,9 @@ static void sta2x11_regmap_unlock(void *__lock) } static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = { - [sta2x11_sctl] = "sta2x11-sctl", - [sta2x11_apbreg] = "sta2x11-apbreg", - [sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs", + [sta2x11_sctl] = STA2X11_MFD_SCTL_NAME, + [sta2x11_apbreg] = STA2X11_MFD_APBREG_NAME, + [sta2x11_apb_soc_regs] = STA2X11_MFD_APB_SOC_REGS_NAME, }; static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg) @@ -340,7 +340,7 @@ static int sta2x11_apb_soc_regs_probe(struct platform_device *dev) /* The three platform drivers */ static struct platform_driver sta2x11_sctl_platform_driver = { .driver = { - .name = "sta2x11-sctl", + .name = STA2X11_MFD_SCTL_NAME, .owner = THIS_MODULE, }, .probe = sta2x11_sctl_probe, @@ -354,7 +354,7 @@ static int __init sta2x11_sctl_init(void) static struct platform_driver sta2x11_platform_driver = { .driver = { - .name = "sta2x11-apbreg", + .name = STA2X11_MFD_APBREG_NAME, .owner = THIS_MODULE, }, .probe = sta2x11_apbreg_probe, @@ -368,7 +368,7 @@ static int __init sta2x11_apbreg_init(void) static struct platform_driver sta2x11_apb_soc_regs_platform_driver = { .driver = { - .name = "sta2x11-apb-soc-regs", + .name = STA2X11_MFD_APB_SOC_REGS_NAME, .owner = THIS_MODULE, }, .probe = sta2x11_apb_soc_regs_probe, @@ -409,38 +409,40 @@ enum mfd0_bar1_cells { static const __devinitconst struct resource gpio_resources[] = { { - .name = "sta2x11_gpio", /* 4 consecutive cells, 1 driver */ + /* 4 consecutive cells, 1 driver */ + .name = STA2X11_MFD_GPIO_NAME, .start = 0, .end = (4 * 4096) - 1, .flags = IORESOURCE_MEM, } }; static const __devinitconst struct resource sctl_resources[] = { - CELL_4K("sta2x11-sctl", STA2X11_SCTL), + CELL_4K(STA2X11_MFD_SCTL_NAME, STA2X11_SCTL), }; static const __devinitconst struct resource scr_resources[] = { - CELL_4K("sta2x11-scr", STA2X11_SCR), + CELL_4K(STA2X11_MFD_SCR_NAME, STA2X11_SCR), }; static const __devinitconst struct resource time_resources[] = { - CELL_4K("sta2x11-time", STA2X11_TIME), + CELL_4K(STA2X11_MFD_TIME_NAME, STA2X11_TIME), }; static const __devinitconst struct resource apbreg_resources[] = { - CELL_4K("sta2x11-apbreg", STA2X11_APBREG), + CELL_4K(STA2X11_MFD_APBREG_NAME, STA2X11_APBREG), }; #define DEV(_name, _r) \ { .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, } static __devinitdata struct mfd_cell sta2x11_mfd0_bar0[] = { - DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */ - DEV("sta2x11-sctl", sctl_resources), - DEV("sta2x11-scr", scr_resources), - DEV("sta2x11-time", time_resources), + /* offset 0: we add pdata later */ + DEV(STA2X11_MFD_GPIO_NAME, gpio_resources), + DEV(STA2X11_MFD_SCTL_NAME, sctl_resources), + DEV(STA2X11_MFD_SCR_NAME, scr_resources), + DEV(STA2X11_MFD_TIME_NAME, time_resources), }; static __devinitdata struct mfd_cell sta2x11_mfd0_bar1[] = { - DEV("sta2x11-apbreg", apbreg_resources), + DEV(STA2X11_MFD_APBREG_NAME, apbreg_resources), }; /* Mfd 1 devices */ @@ -456,19 +458,19 @@ enum mfd1_bar1_cells { }; static const __devinitconst struct resource vic_resources[] = { - CELL_4K("sta2x11-vic", STA2X11_VIC), + CELL_4K(STA2X11_MFD_VIC_NAME, STA2X11_VIC), }; static const __devinitconst struct resource apb_soc_regs_resources[] = { - CELL_4K("sta2x11-apb-soc-regs", STA2X11_APB_SOC_REGS), + CELL_4K(STA2X11_MFD_APB_SOC_REGS_NAME, STA2X11_APB_SOC_REGS), }; static __devinitdata struct mfd_cell sta2x11_mfd1_bar0[] = { - DEV("sta2x11-vic", vic_resources), + DEV(STA2X11_MFD_VIC_NAME, vic_resources), }; static __devinitdata struct mfd_cell sta2x11_mfd1_bar1[] = { - DEV("sta2x11-apb-soc-regs", apb_soc_regs_resources), + DEV(STA2X11_MFD_APB_SOC_REGS_NAME, apb_soc_regs_resources), }; diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h index e813e5efbe4e..058de2bacf76 100644 --- a/include/linux/mfd/sta2x11-mfd.h +++ b/include/linux/mfd/sta2x11-mfd.h @@ -37,6 +37,14 @@ enum sta2x11_mfd_plat_dev { sta2x11_n_mfd_plat_devs, }; +#define STA2X11_MFD_SCTL_NAME "sta2x11-sctl" +#define STA2X11_MFD_GPIO_NAME "sta2x11-gpio" +#define STA2X11_MFD_SCR_NAME "sta2x11-scr" +#define STA2X11_MFD_TIME_NAME "sta2x11-time" +#define STA2X11_MFD_APBREG_NAME "sta2x11-apbreg" +#define STA2X11_MFD_APB_SOC_REGS_NAME "sta2x11-apb-soc-regs" +#define STA2X11_MFD_VIC_NAME "sta2x11-vic" + extern u32 __sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev); -- cgit v1.2.3 From dba6c1aeea4dd0e251e41c3f585abf4a06a4f057 Mon Sep 17 00:00:00 2001 From: Davide Ciminaghi Date: Fri, 9 Nov 2012 15:19:59 +0100 Subject: mfd: sta2x11-mfd: Add scr (otp registers) platform driver Signed-off-by: Davide Ciminaghi Acked-by: Alessandro Rubini Signed-off-by: Samuel Ortiz --- drivers/mfd/sta2x11-mfd.c | 52 ++++++++++++++++++++++++++++++++++++++++- include/linux/mfd/sta2x11-mfd.h | 7 ++++++ 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c index da65839f1d86..7365f0f89df2 100644 --- a/drivers/mfd/sta2x11-mfd.c +++ b/drivers/mfd/sta2x11-mfd.c @@ -175,10 +175,16 @@ static void sta2x11_regmap_unlock(void *__lock) spin_unlock(lock); } +/* OTP (one time programmable registers do not require locking */ +static void sta2x11_regmap_nolock(void *__lock) +{ +} + static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = { [sta2x11_sctl] = STA2X11_MFD_SCTL_NAME, [sta2x11_apbreg] = STA2X11_MFD_APBREG_NAME, [sta2x11_apb_soc_regs] = STA2X11_MFD_APB_SOC_REGS_NAME, + [sta2x11_scr] = STA2X11_MFD_SCR_NAME, }; static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg) @@ -196,6 +202,28 @@ static struct regmap_config sta2x11_sctl_regmap_config = { .writeable_reg = sta2x11_sctl_writeable_reg, }; +static bool sta2x11_scr_readable_reg(struct device *dev, unsigned int reg) +{ + return (reg == STA2X11_SECR_CR) || + __reg_within_range(reg, STA2X11_SECR_FVR0, STA2X11_SECR_FVR1); +} + +static bool sta2x11_scr_writeable_reg(struct device *dev, unsigned int reg) +{ + return false; +} + +static struct regmap_config sta2x11_scr_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .lock = sta2x11_regmap_nolock, + .unlock = sta2x11_regmap_nolock, + .max_register = STA2X11_SECR_FVR1, + .readable_reg = sta2x11_scr_readable_reg, + .writeable_reg = sta2x11_scr_writeable_reg, +}; + static bool sta2x11_apbreg_readable_reg(struct device *dev, unsigned int reg) { /* Two blocks (CAN and MLB, SARAC) 0x100 bytes apart */ @@ -279,9 +307,10 @@ sta2x11_mfd_regmap_configs[sta2x11_n_mfd_plat_devs] = { [sta2x11_sctl] = &sta2x11_sctl_regmap_config, [sta2x11_apbreg] = &sta2x11_apbreg_regmap_config, [sta2x11_apb_soc_regs] = &sta2x11_apb_soc_regs_regmap_config, + [sta2x11_scr] = &sta2x11_scr_regmap_config, }; -/* Probe for the three platform devices */ +/* Probe for the four platform devices */ static int sta2x11_mfd_platform_probe(struct platform_device *dev, enum sta2x11_mfd_plat_dev index) @@ -339,6 +368,11 @@ static int sta2x11_apb_soc_regs_probe(struct platform_device *dev) return sta2x11_mfd_platform_probe(dev, sta2x11_apb_soc_regs); } +static int sta2x11_scr_probe(struct platform_device *dev) +{ + return sta2x11_mfd_platform_probe(dev, sta2x11_scr); +} + /* The three platform drivers */ static struct platform_driver sta2x11_sctl_platform_driver = { .driver = { @@ -382,6 +416,21 @@ static int __init sta2x11_apb_soc_regs_init(void) return platform_driver_register(&sta2x11_apb_soc_regs_platform_driver); } +static struct platform_driver sta2x11_scr_platform_driver = { + .driver = { + .name = STA2X11_MFD_SCR_NAME, + .owner = THIS_MODULE, + }, + .probe = sta2x11_scr_probe, +}; + +static int __init sta2x11_scr_init(void) +{ + pr_info("%s\n", __func__); + return platform_driver_register(&sta2x11_scr_platform_driver); +} + + /* * What follows are the PCI devices that host the above pdevs. * Each logic block is 4kB and they are all consecutive: we use this info. @@ -631,6 +680,7 @@ static int __init sta2x11_mfd_init(void) subsys_initcall(sta2x11_apbreg_init); subsys_initcall(sta2x11_sctl_init); subsys_initcall(sta2x11_apb_soc_regs_init); +subsys_initcall(sta2x11_scr_init); rootfs_initcall(sta2x11_mfd_init); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h index 058de2bacf76..08cad95758c6 100644 --- a/include/linux/mfd/sta2x11-mfd.h +++ b/include/linux/mfd/sta2x11-mfd.h @@ -482,6 +482,13 @@ u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) #define COMPENSATION_REG3 0x3cc #define TEST_CTL_REG 0x3d0 +/* + * SECR (OTP) registers + */ +#define STA2X11_SECR_CR 0x00 +#define STA2X11_SECR_FVR0 0x10 +#define STA2X11_SECR_FVR1 0x14 + extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev, enum sta2x11_mfd_plat_dev index, void __iomem **regs, -- cgit v1.2.3 From 818b5c2528b9e31101bb39018fd211dcf159a696 Mon Sep 17 00:00:00 2001 From: Davide Ciminaghi Date: Fri, 9 Nov 2012 15:20:00 +0100 Subject: mfd: sta2x11-mfd: Add defines for some sta2x11 sctl registers These are required for the clock infrastructure code to properly configure and control the sta2x11 PLLs. Signed-off-by: Davide Ciminaghi Acked-by: Alessandro Rubini Signed-off-by: Samuel Ortiz --- include/linux/mfd/sta2x11-mfd.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h index 08cad95758c6..9a855ac11cbf 100644 --- a/include/linux/mfd/sta2x11-mfd.h +++ b/include/linux/mfd/sta2x11-mfd.h @@ -246,8 +246,29 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) #define SCTL_SCCTL 0x00 /* System controller control register */ #define SCTL_ARMCFG 0x04 /* ARM configuration register */ #define SCTL_SCPLLCTL 0x08 /* PLL control status register */ + +#define SCTL_SCPLLCTL_AUDIO_PLL_PD BIT(1) +#define SCTL_SCPLLCTL_FRAC_CONTROL BIT(3) +#define SCTL_SCPLLCTL_STRB_BYPASS BIT(6) +#define SCTL_SCPLLCTL_STRB_INPUT BIT(8) + #define SCTL_SCPLLFCTRL 0x0c /* PLL frequency control register */ + +#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_MASK 0xff +#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_SHIFT 10 +#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_MASK 7 +#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_SHIFT 21 +#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_MASK 7 +#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_SHIFT 18 +#define SCTL_SCPLLFCTRL_DITHER_DISABLE_MASK 0x03 +#define SCTL_SCPLLFCTRL_DITHER_DISABLE_SHIFT 4 + + #define SCTL_SCRESFRACT 0x10 /* PLL fractional input register */ + +#define SCTL_SCRESFRACT_MASK 0x0000ffff + + #define SCTL_SCRESCTRL1 0x14 /* Peripheral reset control 1 */ #define SCTL_SCRESXTRL2 0x18 /* Peripheral reset control 2 */ #define SCTL_SCPEREN0 0x1c /* Peripheral clock enable register 0 */ -- cgit v1.2.3 From bcf58e725ddc45d31addbc6627d4f0edccc824c1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 04:02:49 -0700 Subject: userns: Make create_new_namespaces take a user_ns parameter Modify create_new_namespaces to explicitly take a user namespace parameter, instead of implicitly through the task_struct. This allows an implementation of unshare(CLONE_NEWUSER) where the new user namespace is not stored onto the current task_struct until after all of the namespaces are created. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/ipc_namespace.h | 7 ++++--- include/linux/utsname.h | 6 +++--- ipc/namespace.c | 10 ++++------ kernel/nsproxy.c | 22 +++++++++++++--------- kernel/utsname.c | 9 ++++----- 5 files changed, 28 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 5499c92a9153..f03af702a39d 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -133,7 +133,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #if defined(CONFIG_IPC_NS) extern struct ipc_namespace *copy_ipcs(unsigned long flags, - struct task_struct *tsk); + struct user_namespace *user_ns, struct ipc_namespace *ns); + static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) @@ -144,12 +145,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, - struct task_struct *tsk) + struct user_namespace *user_ns, struct ipc_namespace *ns) { if (flags & CLONE_NEWIPC) return ERR_PTR(-EINVAL); - return tsk->nsproxy->ipc_ns; + return ns; } static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 2b345206722a..221f4a0a7502 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -33,7 +33,7 @@ static inline void get_uts_ns(struct uts_namespace *ns) } extern struct uts_namespace *copy_utsname(unsigned long flags, - struct task_struct *tsk); + struct user_namespace *user_ns, struct uts_namespace *old_ns); extern void free_uts_ns(struct kref *kref); static inline void put_uts_ns(struct uts_namespace *ns) @@ -50,12 +50,12 @@ static inline void put_uts_ns(struct uts_namespace *ns) } static inline struct uts_namespace *copy_utsname(unsigned long flags, - struct task_struct *tsk) + struct user_namespace *user_ns, struct uts_namespace *old_ns) { if (flags & CLONE_NEWUTS) return ERR_PTR(-EINVAL); - return tsk->nsproxy->uts_ns; + return old_ns; } #endif diff --git a/ipc/namespace.c b/ipc/namespace.c index 6ed33c05cb66..72c868277793 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -16,7 +16,7 @@ #include "util.h" -static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, +static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, struct ipc_namespace *old_ns) { struct ipc_namespace *ns; @@ -46,19 +46,17 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, ipcns_notify(IPCNS_CREATED); register_ipcns_notifier(ns); - ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); + ns->user_ns = get_user_ns(user_ns); return ns; } struct ipc_namespace *copy_ipcs(unsigned long flags, - struct task_struct *tsk) + struct user_namespace *user_ns, struct ipc_namespace *ns) { - struct ipc_namespace *ns = tsk->nsproxy->ipc_ns; - if (!(flags & CLONE_NEWIPC)) return get_ipc_ns(ns); - return create_ipc_ns(tsk, ns); + return create_ipc_ns(user_ns, ns); } /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 4357a0a7d17d..2ddd81657a2a 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void) * leave it to the caller to do proper locking and attach it to task. */ static struct nsproxy *create_new_namespaces(unsigned long flags, - struct task_struct *tsk, struct fs_struct *new_fs) + struct task_struct *tsk, struct user_namespace *user_ns, + struct fs_struct *new_fs) { struct nsproxy *new_nsp; int err; @@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, if (!new_nsp) return ERR_PTR(-ENOMEM); - new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, task_cred_xxx(tsk, user_ns), new_fs); + new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs); if (IS_ERR(new_nsp->mnt_ns)) { err = PTR_ERR(new_nsp->mnt_ns); goto out_ns; } - new_nsp->uts_ns = copy_utsname(flags, tsk); + new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns); if (IS_ERR(new_nsp->uts_ns)) { err = PTR_ERR(new_nsp->uts_ns); goto out_uts; } - new_nsp->ipc_ns = copy_ipcs(flags, tsk); + new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns); if (IS_ERR(new_nsp->ipc_ns)) { err = PTR_ERR(new_nsp->ipc_ns); goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->pid_ns); + new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns); if (IS_ERR(new_nsp->pid_ns)) { err = PTR_ERR(new_nsp->pid_ns); goto out_pid; } - new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns); + new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns); if (IS_ERR(new_nsp->net_ns)) { err = PTR_ERR(new_nsp->net_ns); goto out_net; @@ -152,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) goto out; } - new_ns = create_new_namespaces(flags, tsk, tsk->fs); + new_ns = create_new_namespaces(flags, tsk, + task_cred_xxx(tsk, user_ns), tsk->fs); if (IS_ERR(new_ns)) { err = PTR_ERR(new_ns); goto out; @@ -186,6 +188,7 @@ void free_nsproxy(struct nsproxy *ns) int unshare_nsproxy_namespaces(unsigned long unshare_flags, struct nsproxy **new_nsp, struct fs_struct *new_fs) { + struct user_namespace *user_ns; int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | @@ -195,7 +198,8 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, if (!nsown_capable(CAP_SYS_ADMIN)) return -EPERM; - *new_nsp = create_new_namespaces(unshare_flags, current, + user_ns = current_user_ns(); + *new_nsp = create_new_namespaces(unshare_flags, current, user_ns, new_fs ? new_fs : current->fs); if (IS_ERR(*new_nsp)) { err = PTR_ERR(*new_nsp); @@ -252,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) if (nstype && (ops->type != nstype)) goto out; - new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); + new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); if (IS_ERR(new_nsproxy)) { err = PTR_ERR(new_nsproxy); goto out; diff --git a/kernel/utsname.c b/kernel/utsname.c index 4a9362f9325d..fdc619eb61ef 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -32,7 +32,7 @@ static struct uts_namespace *create_uts_ns(void) * @old_ns: namespace to clone * Return NULL on error (failure to kmalloc), new ns otherwise */ -static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, +static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, struct uts_namespace *old_ns) { struct uts_namespace *ns; @@ -43,7 +43,7 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); - ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); + ns->user_ns = get_user_ns(user_ns); up_read(&uts_sem); return ns; } @@ -55,9 +55,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, * versa. */ struct uts_namespace *copy_utsname(unsigned long flags, - struct task_struct *tsk) + struct user_namespace *user_ns, struct uts_namespace *old_ns) { - struct uts_namespace *old_ns = tsk->nsproxy->uts_ns; struct uts_namespace *new_ns; BUG_ON(!old_ns); @@ -66,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, if (!(flags & CLONE_NEWUTS)) return old_ns; - new_ns = clone_uts_ns(tsk, old_ns); + new_ns = clone_uts_ns(user_ns, old_ns); put_uts_ns(old_ns); return new_ns; -- cgit v1.2.3 From 4c44aaafa8108f584831850ab48a975e971db2de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 05:05:21 -0700 Subject: userns: Kill task_user_ns The task_user_ns function hides the fact that it is getting the user namespace from struct cred on the task. struct cred may go away as soon as the rcu lock is released. This leads to a race where we can dereference a stale user namespace pointer. To make it obvious a struct cred is involved kill task_user_ns. To kill the race modify the users of task_user_ns to only reference the user namespace while the rcu lock is held. Cc: Kees Cook Cc: James Morris Acked-by: Kees Cook Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/cred.h | 2 -- kernel/ptrace.c | 10 ++++++++-- kernel/sched/core.c | 10 ++++++++-- security/yama/yama_lsm.c | 12 +++++++++--- 4 files changed, 25 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index ebbed2ce6637..856d2622d832 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -357,10 +357,8 @@ static inline void put_cred(const struct cred *_cred) extern struct user_namespace init_user_ns; #ifdef CONFIG_USER_NS #define current_user_ns() (current_cred_xxx(user_ns)) -#define task_user_ns(task) (task_cred_xxx((task), user_ns)) #else #define current_user_ns() (&init_user_ns) -#define task_user_ns(task) (&init_user_ns) #endif diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f5e55dda955..7b09b88862cc 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -215,8 +215,12 @@ ok: smp_rmb(); if (task->mm) dumpable = get_dumpable(task->mm); - if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode)) + rcu_read_lock(); + if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); return security_ptrace_access_check(task, mode); } @@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; - if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) + rcu_read_lock(); + if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) flags |= PT_PTRACE_CAP; + rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d8927fda712..2f5eb1838b3e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4029,8 +4029,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) goto out_free_cpus_allowed; } retval = -EPERM; - if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE)) - goto out_unlock; + if (!check_same_owner(p)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); + goto out_unlock; + } + rcu_read_unlock(); + } retval = security_task_setscheduler(p); if (retval) diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index b4c29848b49d..0e72239aeb05 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -262,14 +262,18 @@ int yama_ptrace_access_check(struct task_struct *child, /* No additional restrictions. */ break; case YAMA_SCOPE_RELATIONAL: + rcu_read_lock(); if (!task_is_descendant(current, child) && !ptracer_exception_found(current, child) && - !ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) + !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; + rcu_read_unlock(); break; case YAMA_SCOPE_CAPABILITY: - if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) + rcu_read_lock(); + if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; + rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: default: @@ -307,8 +311,10 @@ int yama_ptrace_traceme(struct task_struct *parent) /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { case YAMA_SCOPE_CAPABILITY: - if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE)) + rcu_read_lock(); + if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; + rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: rc = -EPERM; -- cgit v1.2.3 From cde1975bc242f3e1072bde623ef378e547b73f91 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 06:24:06 -0700 Subject: userns: Implent proc namespace operations This allows entering a user namespace, and the ability to store a reference to a user namespace with a bind mount. Addition of missing userns_ns_put in userns_install from Gao feng Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- fs/proc/namespaces.c | 4 +++ include/linux/proc_fs.h | 1 + kernel/user_namespace.c | 90 +++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 78 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 2a17fd9ae6a9..030250c27d70 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "internal.h" @@ -26,6 +27,9 @@ static const struct proc_ns_operations *ns_entries[] = { #endif #ifdef CONFIG_PID_NS &pidns_operations, +#endif +#ifdef CONFIG_USER_NS + &userns_operations, #endif &mntns_operations, }; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 9014c041e752..31447819bc55 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -258,6 +258,7 @@ extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations userns_operations; extern const struct proc_ns_operations mntns_operations; union proc_op { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 49096d559e08..a9460774e77d 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly; static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); +static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + /* Start with the same capabilities as init but useless for doing + * anything as the capabilities are bound to the new user namespace. + */ + cred->securebits = SECUREBITS_DEFAULT; + cred->cap_inheritable = CAP_EMPTY_SET; + cred->cap_permitted = CAP_FULL_SET; + cred->cap_effective = CAP_FULL_SET; + cred->cap_bset = CAP_FULL_SET; +#ifdef CONFIG_KEYS + key_put(cred->request_key_auth); + cred->request_key_auth = NULL; +#endif + /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ + cred->user_ns = user_ns; +} + /* * Create a new user namespace, deriving the creator from the user in the * passed credentials, and replacing that user with the new root user for the @@ -53,27 +72,12 @@ int create_user_ns(struct cred *new) return -ENOMEM; kref_init(&ns->kref); + /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; ns->owner = owner; ns->group = group; - /* Start with the same capabilities as init but useless for doing - * anything as the capabilities are bound to the new user namespace. - */ - new->securebits = SECUREBITS_DEFAULT; - new->cap_inheritable = CAP_EMPTY_SET; - new->cap_permitted = CAP_FULL_SET; - new->cap_effective = CAP_FULL_SET; - new->cap_bset = CAP_FULL_SET; -#ifdef CONFIG_KEYS - key_put(new->request_key_auth); - new->request_key_auth = NULL; -#endif - /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ - - /* Leave the new->user_ns reference with the new user namespace. */ - /* Leave the reference to our user_ns with the new cred. */ - new->user_ns = ns; + set_cred_user_ns(new, ns); return 0; } @@ -737,6 +741,58 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, return false; } +static void *userns_get(struct task_struct *task) +{ + struct user_namespace *user_ns; + + rcu_read_lock(); + user_ns = get_user_ns(__task_cred(task)->user_ns); + rcu_read_unlock(); + + return user_ns; +} + +static void userns_put(void *ns) +{ + put_user_ns(ns); +} + +static int userns_install(struct nsproxy *nsproxy, void *ns) +{ + struct user_namespace *user_ns = ns; + struct cred *cred; + + /* Don't allow gaining capabilities by reentering + * the same user namespace. + */ + if (user_ns == current_user_ns()) + return -EINVAL; + + /* Threaded many not enter a different user namespace */ + if (atomic_read(¤t->mm->mm_users) > 1) + return -EINVAL; + + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + cred = prepare_creds(); + if (!cred) + return -ENOMEM; + + put_user_ns(cred->user_ns); + set_cred_user_ns(cred, get_user_ns(user_ns)); + + return commit_creds(cred); +} + +const struct proc_ns_operations userns_operations = { + .name = "user", + .type = CLONE_NEWUSER, + .get = userns_get, + .put = userns_put, + .install = userns_install, +}; + static __init int user_namespaces_init(void) { user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); -- cgit v1.2.3 From b2e0d98705e60e45bbb3c0032c48824ad7ae0704 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Jul 2012 05:15:35 -0700 Subject: userns: Implement unshare of the user namespace - Add CLONE_THREAD to the unshare flags if CLONE_NEWUSER is selected As changing user namespaces is only valid if all there is only a single thread. - Restore the code to add CLONE_VM if CLONE_THREAD is selected and the code to addCLONE_SIGHAND if CLONE_VM is selected. Making the constraints in the code clear. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/nsproxy.h | 2 +- include/linux/user_namespace.h | 9 +++++++++ kernel/fork.c | 25 ++++++++++++++++++++++--- kernel/nsproxy.c | 8 ++++---- kernel/user_namespace.c | 15 +++++++++++++++ 5 files changed, 51 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cc37a55ad004..10e5947491c7 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, - struct fs_struct *); + struct cred *, struct fs_struct *); int __init nsproxy_cache_init(void); static inline void put_nsproxy(struct nsproxy *ns) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 95142cae446a..17651f08d67f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -39,6 +39,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) } extern int create_user_ns(struct cred *new); +extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); extern void free_user_ns(struct kref *kref); static inline void put_user_ns(struct user_namespace *ns) @@ -66,6 +67,14 @@ static inline int create_user_ns(struct cred *new) return -EINVAL; } +static inline int unshare_userns(unsigned long unshare_flags, + struct cred **new_cred) +{ + if (unshare_flags & CLONE_NEWUSER) + return -EINVAL; + return 0; +} + static inline void put_user_ns(struct user_namespace *ns) { } diff --git a/kernel/fork.c b/kernel/fork.c index 8c29abb19014..38e53b87402c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1687,7 +1687,7 @@ static int check_unshare_flags(unsigned long unshare_flags) if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| - CLONE_NEWPID)) + CLONE_NEWUSER|CLONE_NEWPID)) return -EINVAL; /* * Not implemented, but pretend it works if there is nothing to @@ -1754,10 +1754,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { struct fs_struct *fs, *new_fs = NULL; struct files_struct *fd, *new_fd = NULL; + struct cred *new_cred = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; int err; + /* + * If unsharing a user namespace must also unshare the thread. + */ + if (unshare_flags & CLONE_NEWUSER) + unshare_flags |= CLONE_THREAD; /* * If unsharing a pid namespace must also unshare the thread. */ @@ -1795,11 +1801,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) err = unshare_fd(unshare_flags, &new_fd); if (err) goto bad_unshare_cleanup_fs; - err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); + err = unshare_userns(unshare_flags, &new_cred); if (err) goto bad_unshare_cleanup_fd; + err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, + new_cred, new_fs); + if (err) + goto bad_unshare_cleanup_cred; - if (new_fs || new_fd || do_sysvsem || new_nsproxy) { + if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). @@ -1832,11 +1842,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) } task_unlock(current); + + if (new_cred) { + /* Install the new user namespace */ + commit_creds(new_cred); + new_cred = NULL; + } } if (new_nsproxy) put_nsproxy(new_nsproxy); +bad_unshare_cleanup_cred: + if (new_cred) + put_cred(new_cred); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 2ddd81657a2a..78e2ecb20165 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -186,7 +186,7 @@ void free_nsproxy(struct nsproxy *ns) * On success, returns the new nsproxy. */ int unshare_nsproxy_namespaces(unsigned long unshare_flags, - struct nsproxy **new_nsp, struct fs_struct *new_fs) + struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs) { struct user_namespace *user_ns; int err = 0; @@ -195,12 +195,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, CLONE_NEWNET | CLONE_NEWPID))) return 0; - if (!nsown_capable(CAP_SYS_ADMIN)) + user_ns = new_cred ? new_cred->user_ns : current_user_ns(); + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; - user_ns = current_user_ns(); *new_nsp = create_new_namespaces(unshare_flags, current, user_ns, - new_fs ? new_fs : current->fs); + new_fs ? new_fs : current->fs); if (IS_ERR(*new_nsp)) { err = PTR_ERR(*new_nsp); goto out; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a9460774e77d..ce92f7e6290a 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -82,6 +82,21 @@ int create_user_ns(struct cred *new) return 0; } +int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) +{ + struct cred *cred; + + if (!(unshare_flags & CLONE_NEWUSER)) + return 0; + + cred = prepare_creds(); + if (!cred) + return -ENOMEM; + + *new_cred = cred; + return create_user_ns(cred); +} + void free_user_ns(struct kref *kref) { struct user_namespace *parent, *ns = -- cgit v1.2.3 From 33d6dce607573b5fd7a43168e0d91221b3ca532b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 17 Jun 2011 13:33:20 -0700 Subject: proc: Generalize proc inode allocation Generalize the proc inode allocation so that it can be used without having to having to create a proc_dir_entry. This will allow namespace file descriptors to remain light weight entitities but still have the same inode number when the backing namespace is the same. Acked-by: Serge E. Hallyn Signed-off-by: Eric W. Biederman --- fs/proc/generic.c | 26 +++++++++++++------------- include/linux/proc_fs.h | 10 ++++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 0d80cef4cfb9..7b3ae3cc0ef9 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ * Return an inode number between PROC_DYNAMIC_FIRST and * 0xffffffff, or zero on failure. */ -static unsigned int get_inode_number(void) +int proc_alloc_inum(unsigned int *inum) { unsigned int i; int error; retry: - if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) - return 0; + if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) + return -ENOMEM; spin_lock(&proc_inum_lock); error = ida_get_new(&proc_inum_ida, &i); @@ -365,18 +365,19 @@ retry: if (error == -EAGAIN) goto retry; else if (error) - return 0; + return error; if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { spin_lock(&proc_inum_lock); ida_remove(&proc_inum_ida, i); spin_unlock(&proc_inum_lock); - return 0; + return -ENOSPC; } - return PROC_DYNAMIC_FIRST + i; + *inum = PROC_DYNAMIC_FIRST + i; + return 0; } -static void release_inode_number(unsigned int inum) +void proc_free_inum(unsigned int inum) { spin_lock(&proc_inum_lock); ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); @@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { - unsigned int i; struct proc_dir_entry *tmp; + int ret; - i = get_inode_number(); - if (i == 0) - return -EAGAIN; - dp->low_ino = i; + ret = proc_alloc_inum(&dp->low_ino); + if (ret) + return ret; if (S_ISDIR(dp->mode)) { if (dp->proc_iops == NULL) { @@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data); static void free_proc_entry(struct proc_dir_entry *de) { - release_inode_number(de->low_ino); + proc_free_inum(de->low_ino); if (S_ISLNK(de->mode)) kfree(de->data); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 31447819bc55..bf1d000fbba6 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -176,6 +176,8 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, extern struct file *proc_ns_fget(int fd); extern bool proc_ns_inode(struct inode *inode); +extern int proc_alloc_inum(unsigned int *pino); +extern void proc_free_inum(unsigned int inum); #else #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) @@ -235,6 +237,14 @@ static inline bool proc_ns_inode(struct inode *inode) return false; } +static inline int proc_alloc_inum(unsigned int *inum) +{ + *inum = 1; + return 0; +} +static inline void proc_free_inum(unsigned int inum) +{ +} #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) -- cgit v1.2.3 From 98f842e675f96ffac96e6c50315790912b2812be Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 15 Jun 2011 10:21:48 -0700 Subject: proc: Usable inode numbers for the namespace file descriptors. Assign a unique proc inode to each namespace, and use that inode number to ensure we only allocate at most one proc inode for every namespace in proc. A single proc inode per namespace allows userspace to test to see if two processes are in the same namespace. This has been a long requested feature and only blocked because a naive implementation would put the id in a global space and would ultimately require having a namespace for the names of namespaces, making migration and certain virtualization tricks impossible. We still don't have per superblock inode numbers for proc, which appears necessary for application unaware checkpoint/restart and migrations (if the application is using namespace file descriptors) but that is now allowd by the design if it becomes important. I have preallocated the ipc and uts initial proc inode numbers so their structures can be statically initialized. Signed-off-by: Eric W. Biederman --- fs/mount.h | 1 + fs/namespace.c | 14 ++++++++++++++ fs/proc/namespaces.c | 24 ++++++++++++++---------- include/linux/ipc_namespace.h | 2 ++ include/linux/pid_namespace.h | 1 + include/linux/proc_fs.h | 7 ++++++- include/linux/user_namespace.h | 1 + include/linux/utsname.h | 1 + include/net/net_namespace.h | 2 ++ init/version.c | 2 ++ ipc/msgutil.c | 2 ++ ipc/namespace.c | 16 ++++++++++++++++ kernel/pid.c | 1 + kernel/pid_namespace.c | 12 ++++++++++++ kernel/user.c | 2 ++ kernel/user_namespace.c | 15 +++++++++++++++ kernel/utsname.c | 17 ++++++++++++++++- net/core/net_namespace.c | 24 ++++++++++++++++++++++++ 18 files changed, 132 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/mount.h b/fs/mount.h index 630fafc616bb..cd5007980400 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -4,6 +4,7 @@ struct mnt_namespace { atomic_t count; + unsigned int proc_inum; struct mount * root; struct list_head list; struct user_namespace *user_ns; diff --git a/fs/namespace.c b/fs/namespace.c index cab78a74aca3..c1bbe86f4920 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2301,6 +2301,7 @@ dput_out: static void free_mnt_ns(struct mnt_namespace *ns) { + proc_free_inum(ns->proc_inum); put_user_ns(ns->user_ns); kfree(ns); } @@ -2317,10 +2318,16 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) { struct mnt_namespace *new_ns; + int ret; new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); + ret = proc_alloc_inum(&new_ns->proc_inum); + if (ret) { + kfree(new_ns); + return ERR_PTR(ret); + } new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); atomic_set(&new_ns->count, 1); new_ns->root = NULL; @@ -2799,10 +2806,17 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int mntns_inum(void *ns) +{ + struct mnt_namespace *mnt_ns = ns; + return mnt_ns->proc_inum; +} + const struct proc_ns_operations mntns_operations = { .name = "mnt", .type = CLONE_NEWNS, .get = mntns_get, .put = mntns_put, .install = mntns_install, + .inum = mntns_inum, }; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 7a6d8d69cdb8..b7a47196c8c3 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -82,7 +82,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, return ERR_PTR(-ENOMEM); } - inode = new_inode(sb); + inode = iget_locked(sb, ns_ops->inum(ns)); if (!inode) { dput(dentry); ns_ops->put(ns); @@ -90,13 +90,17 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, } ei = PROC_I(inode); - inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &ns_inode_operations; - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_fop = &ns_file_operations; - ei->ns_ops = ns_ops; - ei->ns = ns; + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &ns_inode_operations; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + ei->ns_ops = ns_ops; + ei->ns = ns; + unlock_new_inode(inode); + } else { + ns_ops->put(ns); + } d_set_d_op(dentry, &ns_dentry_operations); result = d_instantiate_unique(dentry, inode); @@ -162,12 +166,12 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ns) goto out_put_task; - snprintf(name, sizeof(name), "%s", ns_ops->name); + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); len = strlen(name); if (len > buflen) len = buflen; - if (copy_to_user(buffer, ns_ops->name, len)) + if (copy_to_user(buffer, name, len)) len = -EFAULT; ns_ops->put(ns); diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index f03af702a39d..fe771978e877 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -67,6 +67,8 @@ struct ipc_namespace { /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; + + unsigned int proc_inum; }; extern struct ipc_namespace init_ipc_ns; diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 4c96acdb2489..bf285999273a 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -37,6 +37,7 @@ struct pid_namespace { kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ + unsigned int proc_inum; }; extern struct pid_namespace init_pid_ns; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index bf1d000fbba6..2e24018b7cec 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -28,7 +28,11 @@ struct mm_struct; */ enum { - PROC_ROOT_INO = 1, + PROC_ROOT_INO = 1, + PROC_IPC_INIT_INO = 0xEFFFFFFFU, + PROC_UTS_INIT_INO = 0xEFFFFFFEU, + PROC_USER_INIT_INO = 0xEFFFFFFDU, + PROC_PID_INIT_INO = 0xEFFFFFFCU, }; /* @@ -263,6 +267,7 @@ struct proc_ns_operations { void *(*get)(struct task_struct *task); void (*put)(void *ns); int (*install)(struct nsproxy *nsproxy, void *ns); + unsigned int (*inum)(void *ns); }; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 17651f08d67f..b9bd2e6c73cc 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -25,6 +25,7 @@ struct user_namespace { struct user_namespace *parent; kuid_t owner; kgid_t group; + unsigned int proc_inum; }; extern struct user_namespace init_user_ns; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 221f4a0a7502..239e27733d6c 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -23,6 +23,7 @@ struct uts_namespace { struct kref kref; struct new_utsname name; struct user_namespace *user_ns; + unsigned int proc_inum; }; extern struct uts_namespace init_uts_ns; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index c5a43f56b796..de644bcd8613 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -56,6 +56,8 @@ struct net { struct user_namespace *user_ns; /* Owning user namespace */ + unsigned int proc_inum; + struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; diff --git a/init/version.c b/init/version.c index 86fe0ccb997a..58170f18912d 100644 --- a/init/version.c +++ b/init/version.c @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_KALLSYMS #define version(a) Version_ ## a @@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = { .domainname = UTS_DOMAINNAME, }, .user_ns = &init_user_ns, + .proc_inum = PROC_UTS_INIT_INO, }; EXPORT_SYMBOL_GPL(init_uts_ns); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 26143d377c95..6471f1bdae96 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "util.h" @@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock); struct ipc_namespace init_ipc_ns = { .count = ATOMIC_INIT(1), .user_ns = &init_user_ns, + .proc_inum = PROC_IPC_INIT_INO, }; atomic_t nr_ipc_ns = ATOMIC_INIT(1); diff --git a/ipc/namespace.c b/ipc/namespace.c index 72c868277793..cf3386a51de2 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (ns == NULL) return ERR_PTR(-ENOMEM); + err = proc_alloc_inum(&ns->proc_inum); + if (err) { + kfree(ns); + return ERR_PTR(err); + } + atomic_set(&ns->count, 1); err = mq_init_ns(ns); if (err) { + proc_free_inum(ns->proc_inum); kfree(ns); return ERR_PTR(err); } @@ -111,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) */ ipcns_notify(IPCNS_REMOVED); put_user_ns(ns->user_ns); + proc_free_inum(ns->proc_inum); kfree(ns); } @@ -172,10 +180,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new) return 0; } +static unsigned int ipcns_inum(void *vp) +{ + struct ipc_namespace *ns = vp; + + return ns->proc_inum; +} + const struct proc_ns_operations ipcns_operations = { .name = "ipc", .type = CLONE_NEWIPC, .get = ipcns_get, .put = ipcns_put, .install = ipcns_install, + .inum = ipcns_inum, }; diff --git a/kernel/pid.c b/kernel/pid.c index 6e8da291de49..3026ddae0a34 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -80,6 +80,7 @@ struct pid_namespace init_pid_ns = { .level = 0, .child_reaper = &init_task, .user_ns = &init_user_ns, + .proc_inum = PROC_PID_INIT_INO, }; EXPORT_SYMBOL_GPL(init_pid_ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 68508d330634..560da0dab230 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -107,6 +107,10 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns if (ns->pid_cachep == NULL) goto out_free_map; + err = proc_alloc_inum(&ns->proc_inum); + if (err) + goto out_free_map; + kref_init(&ns->kref); ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); @@ -133,6 +137,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns) { int i; + proc_free_inum(ns->proc_inum); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); put_user_ns(ns->user_ns); @@ -345,12 +350,19 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int pidns_inum(void *ns) +{ + struct pid_namespace *pid_ns = ns; + return pid_ns->proc_inum; +} + const struct proc_ns_operations pidns_operations = { .name = "pid", .type = CLONE_NEWPID, .get = pidns_get, .put = pidns_put, .install = pidns_install, + .inum = pidns_inum, }; static __init int pid_namespaces_init(void) diff --git a/kernel/user.c b/kernel/user.c index 750acffbe9ec..33acb5e53a5f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * userns count is 1 for root user, 1 for init_uts_ns, @@ -51,6 +52,7 @@ struct user_namespace init_user_ns = { }, .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, + .proc_inum = PROC_USER_INIT_INO, }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 89f6eaed067a..f5975ccf9348 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -58,6 +58,7 @@ int create_user_ns(struct cred *new) struct user_namespace *ns, *parent_ns = new->user_ns; kuid_t owner = new->euid; kgid_t group = new->egid; + int ret; /* The creator needs a mapping in the parent user namespace * or else we won't be able to reasonably tell userspace who @@ -71,6 +72,12 @@ int create_user_ns(struct cred *new) if (!ns) return -ENOMEM; + ret = proc_alloc_inum(&ns->proc_inum); + if (ret) { + kmem_cache_free(user_ns_cachep, ns); + return ret; + } + kref_init(&ns->kref); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; @@ -103,6 +110,7 @@ void free_user_ns(struct kref *kref) container_of(kref, struct user_namespace, kref); parent = ns->parent; + proc_free_inum(ns->proc_inum); kmem_cache_free(user_ns_cachep, ns); put_user_ns(parent); } @@ -808,12 +816,19 @@ static int userns_install(struct nsproxy *nsproxy, void *ns) return commit_creds(cred); } +static unsigned int userns_inum(void *ns) +{ + struct user_namespace *user_ns = ns; + return user_ns->proc_inum; +} + const struct proc_ns_operations userns_operations = { .name = "user", .type = CLONE_NEWUSER, .get = userns_get, .put = userns_put, .install = userns_install, + .inum = userns_inum, }; static __init int user_namespaces_init(void) diff --git a/kernel/utsname.c b/kernel/utsname.c index fdc619eb61ef..f6336d51d64c 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, struct uts_namespace *old_ns) { struct uts_namespace *ns; + int err; ns = create_uts_ns(); if (!ns) return ERR_PTR(-ENOMEM); + err = proc_alloc_inum(&ns->proc_inum); + if (err) { + kfree(ns); + return ERR_PTR(err); + } + down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); ns->user_ns = get_user_ns(user_ns); @@ -77,6 +84,7 @@ void free_uts_ns(struct kref *kref) ns = container_of(kref, struct uts_namespace, kref); put_user_ns(ns->user_ns); + proc_free_inum(ns->proc_inum); kfree(ns); } @@ -114,11 +122,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *new) return 0; } +static unsigned int utsns_inum(void *vp) +{ + struct uts_namespace *ns = vp; + + return ns->proc_inum; +} + const struct proc_ns_operations utsns_operations = { .name = "uts", .type = CLONE_NEWUTS, .get = utsns_get, .put = utsns_put, .install = utsns_install, + .inum = utsns_inum, }; - diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ec2870b44c1f..2e9a3132b8dd 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid) } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); +static __net_init int net_ns_net_init(struct net *net) +{ + return proc_alloc_inum(&net->proc_inum); +} + +static __net_exit void net_ns_net_exit(struct net *net) +{ + proc_free_inum(net->proc_inum); +} + +static struct pernet_operations __net_initdata net_ns_ops = { + .init = net_ns_net_init, + .exit = net_ns_net_exit, +}; + static int __init net_ns_init(void) { struct net_generic *ng; @@ -412,6 +427,8 @@ static int __init net_ns_init(void) mutex_unlock(&net_mutex); + register_pernet_subsys(&net_ns_ops); + return 0; } @@ -640,11 +657,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int netns_inum(void *ns) +{ + struct net *net = ns; + return net->proc_inum; +} + const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, + .inum = netns_inum, }; #endif -- cgit v1.2.3 From 97fa4cf442ff2872000d9110686371775795a32b Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Sat, 17 Nov 2012 15:22:22 +0100 Subject: clk: mvebu: add mvebu core clocks. This driver allows to provide DT clocks for core clocks found on Marvell Kirkwood, Dove & 370/XP SoCs. The core clock frequencies and ratios are determined by decoding the Sample-At-Reset registers. Although technically correct, using a divider of 0 will lead to div_by_zero panic. Let's use a ratio of 0/1 instead to fail later with a zero clock. Signed-off-by: Gregory CLEMENT Signed-off-by: Sebastian Hesselbarth Signed-off-by: Andrew Lunn Tested-by Gregory CLEMENT --- .../devicetree/bindings/clock/mvebu-core-clock.txt | 47 ++ drivers/clk/Kconfig | 2 + drivers/clk/Makefile | 1 + drivers/clk/mvebu/Kconfig | 3 + drivers/clk/mvebu/Makefile | 1 + drivers/clk/mvebu/clk-core.c | 675 +++++++++++++++++++++ drivers/clk/mvebu/clk-core.h | 18 + drivers/clk/mvebu/clk.c | 23 + include/linux/clk/mvebu.h | 22 + 9 files changed, 792 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/mvebu-core-clock.txt create mode 100644 drivers/clk/mvebu/Kconfig create mode 100644 drivers/clk/mvebu/Makefile create mode 100644 drivers/clk/mvebu/clk-core.c create mode 100644 drivers/clk/mvebu/clk-core.h create mode 100644 drivers/clk/mvebu/clk.c create mode 100644 include/linux/clk/mvebu.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt new file mode 100644 index 000000000000..1e662948661e --- /dev/null +++ b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt @@ -0,0 +1,47 @@ +* Core Clock bindings for Marvell MVEBU SoCs + +Marvell MVEBU SoCs usually allow to determine core clock frequencies by +reading the Sample-At-Reset (SAR) register. The core clock consumer should +specify the desired clock by having the clock ID in its "clocks" phandle cell. + +The following is a list of provided IDs and clock names on Armada 370/XP: + 0 = tclk (Internal Bus clock) + 1 = cpuclk (CPU clock) + 2 = nbclk (L2 Cache clock) + 3 = hclk (DRAM control clock) + 4 = dramclk (DDR clock) + +The following is a list of provided IDs and clock names on Kirkwood and Dove: + 0 = tclk (Internal Bus clock) + 1 = cpuclk (CPU0 clock) + 2 = l2clk (L2 Cache clock derived from CPU0 clock) + 3 = ddrclk (DDR controller clock derived from CPU0 clock) + +Required properties: +- compatible : shall be one of the following: + "marvell,armada-370-core-clock" - For Armada 370 SoC core clocks + "marvell,armada-xp-core-clock" - For Armada XP SoC core clocks + "marvell,dove-core-clock" - for Dove SoC core clocks + "marvell,kirkwood-core-clock" - for Kirkwood SoC (except mv88f6180) + "marvell,mv88f6180-core-clock" - for Kirkwood MV88f6180 SoC +- reg : shall be the register address of the Sample-At-Reset (SAR) register +- #clock-cells : from common clock binding; shall be set to 1 + +Optional properties: +- clock-output-names : from common clock binding; allows overwrite default clock + output names ("tclk", "cpuclk", "l2clk", "ddrclk") + +Example: + +core_clk: core-clocks@d0214 { + compatible = "marvell,dove-core-clock"; + reg = <0xd0214 0x4>; + #clock-cells = <1>; +}; + +spi0: spi@10600 { + compatible = "marvell,orion-spi"; + /* ... */ + /* get tclk from core clock provider */ + clocks = <&core_clk 0>; +}; diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index bace9e98f75d..60427c0d23e6 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -54,3 +54,5 @@ config COMMON_CLK_MAX77686 This driver supports Maxim 77686 crystal oscillator clock. endmenu + +source "drivers/clk/mvebu/Kconfig" diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 71a25b91de00..d0a14ae8d49c 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_PLAT_SPEAR) += spear/ obj-$(CONFIG_ARCH_U300) += clk-u300.o obj-$(CONFIG_COMMON_CLK_VERSATILE) += versatile/ obj-$(CONFIG_ARCH_PRIMA2) += clk-prima2.o +obj-$(CONFIG_PLAT_ORION) += mvebu/ ifeq ($(CONFIG_COMMON_CLK), y) obj-$(CONFIG_ARCH_MMP) += mmp/ endif diff --git a/drivers/clk/mvebu/Kconfig b/drivers/clk/mvebu/Kconfig new file mode 100644 index 000000000000..fd7bf97ff74d --- /dev/null +++ b/drivers/clk/mvebu/Kconfig @@ -0,0 +1,3 @@ +config MVEBU_CLK_CORE + bool + diff --git a/drivers/clk/mvebu/Makefile b/drivers/clk/mvebu/Makefile new file mode 100644 index 000000000000..de1d9617f75a --- /dev/null +++ b/drivers/clk/mvebu/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MVEBU_CLK_CORE) += clk.o clk-core.o diff --git a/drivers/clk/mvebu/clk-core.c b/drivers/clk/mvebu/clk-core.c new file mode 100644 index 000000000000..69056a7479e8 --- /dev/null +++ b/drivers/clk/mvebu/clk-core.c @@ -0,0 +1,675 @@ +/* + * Marvell EBU clock core handling defined at reset + * + * Copyright (C) 2012 Marvell + * + * Gregory CLEMENT + * Sebastian Hesselbarth + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ +#include +#include +#include +#include +#include +#include +#include +#include "clk-core.h" + +struct core_ratio { + int id; + const char *name; +}; + +struct core_clocks { + u32 (*get_tclk_freq)(void __iomem *sar); + u32 (*get_cpu_freq)(void __iomem *sar); + void (*get_clk_ratio)(void __iomem *sar, int id, int *mult, int *div); + const struct core_ratio *ratios; + int num_ratios; +}; + +static struct clk_onecell_data clk_data; + +static void __init mvebu_clk_core_setup(struct device_node *np, + struct core_clocks *coreclk) +{ + const char *tclk_name = "tclk"; + const char *cpuclk_name = "cpuclk"; + void __iomem *base; + unsigned long rate; + int n; + + base = of_iomap(np, 0); + if (WARN_ON(!base)) + return; + + /* + * Allocate struct for TCLK, cpu clk, and core ratio clocks + */ + clk_data.clk_num = 2 + coreclk->num_ratios; + clk_data.clks = kzalloc(clk_data.clk_num * sizeof(struct clk *), + GFP_KERNEL); + if (WARN_ON(!clk_data.clks)) + return; + + /* + * Register TCLK + */ + of_property_read_string_index(np, "clock-output-names", 0, + &tclk_name); + rate = coreclk->get_tclk_freq(base); + clk_data.clks[0] = clk_register_fixed_rate(NULL, tclk_name, NULL, + CLK_IS_ROOT, rate); + WARN_ON(IS_ERR(clk_data.clks[0])); + + /* + * Register CPU clock + */ + of_property_read_string_index(np, "clock-output-names", 1, + &cpuclk_name); + rate = coreclk->get_cpu_freq(base); + clk_data.clks[1] = clk_register_fixed_rate(NULL, cpuclk_name, NULL, + CLK_IS_ROOT, rate); + WARN_ON(IS_ERR(clk_data.clks[1])); + + /* + * Register fixed-factor clocks derived from CPU clock + */ + for (n = 0; n < coreclk->num_ratios; n++) { + const char *rclk_name = coreclk->ratios[n].name; + int mult, div; + + of_property_read_string_index(np, "clock-output-names", + 2+n, &rclk_name); + coreclk->get_clk_ratio(base, coreclk->ratios[n].id, + &mult, &div); + clk_data.clks[2+n] = clk_register_fixed_factor(NULL, rclk_name, + cpuclk_name, 0, mult, div); + WARN_ON(IS_ERR(clk_data.clks[2+n])); + }; + + /* + * SAR register isn't needed anymore + */ + iounmap(base); + + of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data); +} + +#ifdef CONFIG_MACH_ARMADA_370_XP +/* + * Armada 370/XP Sample At Reset is a 64 bit bitfiled split in two + * register of 32 bits + */ + +#define SARL 0 /* Low part [0:31] */ +#define SARL_AXP_PCLK_FREQ_OPT 21 +#define SARL_AXP_PCLK_FREQ_OPT_MASK 0x7 +#define SARL_A370_PCLK_FREQ_OPT 11 +#define SARL_A370_PCLK_FREQ_OPT_MASK 0xF +#define SARL_AXP_FAB_FREQ_OPT 24 +#define SARL_AXP_FAB_FREQ_OPT_MASK 0xF +#define SARL_A370_FAB_FREQ_OPT 15 +#define SARL_A370_FAB_FREQ_OPT_MASK 0x1F +#define SARL_A370_TCLK_FREQ_OPT 20 +#define SARL_A370_TCLK_FREQ_OPT_MASK 0x1 +#define SARH 4 /* High part [32:63] */ +#define SARH_AXP_PCLK_FREQ_OPT (52-32) +#define SARH_AXP_PCLK_FREQ_OPT_MASK 0x1 +#define SARH_AXP_PCLK_FREQ_OPT_SHIFT 3 +#define SARH_AXP_FAB_FREQ_OPT (51-32) +#define SARH_AXP_FAB_FREQ_OPT_MASK 0x1 +#define SARH_AXP_FAB_FREQ_OPT_SHIFT 4 + +static const u32 __initconst armada_370_tclk_frequencies[] = { + 16600000, + 20000000, +}; + +static u32 __init armada_370_get_tclk_freq(void __iomem *sar) +{ + u8 tclk_freq_select = 0; + + tclk_freq_select = ((readl(sar) >> SARL_A370_TCLK_FREQ_OPT) & + SARL_A370_TCLK_FREQ_OPT_MASK); + return armada_370_tclk_frequencies[tclk_freq_select]; +} + +static const u32 __initconst armada_370_cpu_frequencies[] = { + 400000000, + 533000000, + 667000000, + 800000000, + 1000000000, + 1067000000, + 1200000000, +}; + +static u32 __init armada_370_get_cpu_freq(void __iomem *sar) +{ + u32 cpu_freq; + u8 cpu_freq_select = 0; + + cpu_freq_select = ((readl(sar) >> SARL_A370_PCLK_FREQ_OPT) & + SARL_A370_PCLK_FREQ_OPT_MASK); + if (cpu_freq_select > ARRAY_SIZE(armada_370_cpu_frequencies)) { + pr_err("CPU freq select unsuported %d\n", cpu_freq_select); + cpu_freq = 0; + } else + cpu_freq = armada_370_cpu_frequencies[cpu_freq_select]; + + return cpu_freq; +} + +enum { A370_XP_NBCLK, A370_XP_HCLK, A370_XP_DRAMCLK }; + +static const struct core_ratio __initconst armada_370_xp_core_ratios[] = { + { .id = A370_XP_NBCLK, .name = "nbclk" }, + { .id = A370_XP_HCLK, .name = "hclk" }, + { .id = A370_XP_DRAMCLK, .name = "dramclk" }, +}; + +static const int __initconst armada_370_xp_nbclk_ratios[32][2] = { + {0, 1}, {1, 2}, {2, 2}, {2, 2}, + {1, 2}, {1, 2}, {1, 1}, {2, 3}, + {0, 1}, {1, 2}, {2, 4}, {0, 1}, + {1, 2}, {0, 1}, {0, 1}, {2, 2}, + {0, 1}, {0, 1}, {0, 1}, {1, 1}, + {2, 3}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {1, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, +}; + +static const int __initconst armada_370_xp_hclk_ratios[32][2] = { + {0, 1}, {1, 2}, {2, 6}, {2, 3}, + {1, 3}, {1, 4}, {1, 2}, {2, 6}, + {0, 1}, {1, 6}, {2, 10}, {0, 1}, + {1, 4}, {0, 1}, {0, 1}, {2, 5}, + {0, 1}, {0, 1}, {0, 1}, {1, 2}, + {2, 6}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {1, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, +}; + +static const int __initconst armada_370_xp_dramclk_ratios[32][2] = { + {0, 1}, {1, 2}, {2, 3}, {2, 3}, + {1, 3}, {1, 2}, {1, 2}, {2, 6}, + {0, 1}, {1, 3}, {2, 5}, {0, 1}, + {1, 4}, {0, 1}, {0, 1}, {2, 5}, + {0, 1}, {0, 1}, {0, 1}, {1, 1}, + {2, 3}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {1, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, +}; + +static void __init armada_370_xp_get_clk_ratio(u32 opt, + void __iomem *sar, int id, int *mult, int *div) +{ + switch (id) { + case A370_XP_NBCLK: + *mult = armada_370_xp_nbclk_ratios[opt][0]; + *div = armada_370_xp_nbclk_ratios[opt][1]; + break; + case A370_XP_HCLK: + *mult = armada_370_xp_hclk_ratios[opt][0]; + *div = armada_370_xp_hclk_ratios[opt][1]; + break; + case A370_XP_DRAMCLK: + *mult = armada_370_xp_dramclk_ratios[opt][0]; + *div = armada_370_xp_dramclk_ratios[opt][1]; + break; + } +} + +static void __init armada_370_get_clk_ratio( + void __iomem *sar, int id, int *mult, int *div) +{ + u32 opt = ((readl(sar) >> SARL_A370_FAB_FREQ_OPT) & + SARL_A370_FAB_FREQ_OPT_MASK); + + armada_370_xp_get_clk_ratio(opt, sar, id, mult, div); +} + + +static const struct core_clocks armada_370_core_clocks = { + .get_tclk_freq = armada_370_get_tclk_freq, + .get_cpu_freq = armada_370_get_cpu_freq, + .get_clk_ratio = armada_370_get_clk_ratio, + .ratios = armada_370_xp_core_ratios, + .num_ratios = ARRAY_SIZE(armada_370_xp_core_ratios), +}; + +static const u32 __initconst armada_xp_cpu_frequencies[] = { + 1000000000, + 1066000000, + 1200000000, + 1333000000, + 1500000000, + 1666000000, + 1800000000, + 2000000000, + 667000000, + 0, + 800000000, + 1600000000, +}; + +/* For Armada XP TCLK frequency is fix: 250MHz */ +static u32 __init armada_xp_get_tclk_freq(void __iomem *sar) +{ + return 250 * 1000 * 1000; +} + +static u32 __init armada_xp_get_cpu_freq(void __iomem *sar) +{ + u32 cpu_freq; + u8 cpu_freq_select = 0; + + cpu_freq_select = ((readl(sar) >> SARL_AXP_PCLK_FREQ_OPT) & + SARL_AXP_PCLK_FREQ_OPT_MASK); + /* + * The upper bit is not contiguous to the other ones and + * located in the high part of the SAR registers + */ + cpu_freq_select |= (((readl(sar+4) >> SARH_AXP_PCLK_FREQ_OPT) & + SARH_AXP_PCLK_FREQ_OPT_MASK) + << SARH_AXP_PCLK_FREQ_OPT_SHIFT); + if (cpu_freq_select > ARRAY_SIZE(armada_xp_cpu_frequencies)) { + pr_err("CPU freq select unsuported: %d\n", cpu_freq_select); + cpu_freq = 0; + } else + cpu_freq = armada_xp_cpu_frequencies[cpu_freq_select]; + + return cpu_freq; +} + +static void __init armada_xp_get_clk_ratio( + void __iomem *sar, int id, int *mult, int *div) +{ + + u32 opt = ((readl(sar) >> SARL_AXP_FAB_FREQ_OPT) & + SARL_AXP_FAB_FREQ_OPT_MASK); + /* + * The upper bit is not contiguous to the other ones and + * located in the high part of the SAR registers + */ + opt |= (((readl(sar+4) >> SARH_AXP_FAB_FREQ_OPT) & + SARH_AXP_FAB_FREQ_OPT_MASK) + << SARH_AXP_FAB_FREQ_OPT_SHIFT); + + armada_370_xp_get_clk_ratio(opt, sar, id, mult, div); +} + +static const struct core_clocks armada_xp_core_clocks = { + .get_tclk_freq = armada_xp_get_tclk_freq, + .get_cpu_freq = armada_xp_get_cpu_freq, + .get_clk_ratio = armada_xp_get_clk_ratio, + .ratios = armada_370_xp_core_ratios, + .num_ratios = ARRAY_SIZE(armada_370_xp_core_ratios), +}; + +#endif /* CONFIG_MACH_ARMADA_370_XP */ + +/* + * Dove PLL sample-at-reset configuration + * + * SAR0[8:5] : CPU frequency + * 5 = 1000 MHz + * 6 = 933 MHz + * 7 = 933 MHz + * 8 = 800 MHz + * 9 = 800 MHz + * 10 = 800 MHz + * 11 = 1067 MHz + * 12 = 667 MHz + * 13 = 533 MHz + * 14 = 400 MHz + * 15 = 333 MHz + * others reserved. + * + * SAR0[11:9] : CPU to L2 Clock divider ratio + * 0 = (1/1) * CPU + * 2 = (1/2) * CPU + * 4 = (1/3) * CPU + * 6 = (1/4) * CPU + * others reserved. + * + * SAR0[15:12] : CPU to DDR DRAM Clock divider ratio + * 0 = (1/1) * CPU + * 2 = (1/2) * CPU + * 3 = (2/5) * CPU + * 4 = (1/3) * CPU + * 6 = (1/4) * CPU + * 8 = (1/5) * CPU + * 10 = (1/6) * CPU + * 12 = (1/7) * CPU + * 14 = (1/8) * CPU + * 15 = (1/10) * CPU + * others reserved. + * + * SAR0[24:23] : TCLK frequency + * 0 = 166 MHz + * 1 = 125 MHz + * others reserved. + */ +#ifdef CONFIG_ARCH_DOVE +#define SAR_DOVE_CPU_FREQ 5 +#define SAR_DOVE_CPU_FREQ_MASK 0xf +#define SAR_DOVE_L2_RATIO 9 +#define SAR_DOVE_L2_RATIO_MASK 0x7 +#define SAR_DOVE_DDR_RATIO 12 +#define SAR_DOVE_DDR_RATIO_MASK 0xf +#define SAR_DOVE_TCLK_FREQ 23 +#define SAR_DOVE_TCLK_FREQ_MASK 0x3 + +static const u32 __initconst dove_tclk_frequencies[] = { + 166666667, + 125000000, + 0, 0 +}; + +static u32 __init dove_get_tclk_freq(void __iomem *sar) +{ + u32 opt = (readl(sar) >> SAR_DOVE_TCLK_FREQ) & + SAR_DOVE_TCLK_FREQ_MASK; + return dove_tclk_frequencies[opt]; +} + +static const u32 __initconst dove_cpu_frequencies[] = { + 0, 0, 0, 0, 0, + 1000000000, + 933333333, 933333333, + 800000000, 800000000, 800000000, + 1066666667, + 666666667, + 533333333, + 400000000, + 333333333 +}; + +static u32 __init dove_get_cpu_freq(void __iomem *sar) +{ + u32 opt = (readl(sar) >> SAR_DOVE_CPU_FREQ) & + SAR_DOVE_CPU_FREQ_MASK; + return dove_cpu_frequencies[opt]; +} + +enum { DOVE_CPU_TO_L2, DOVE_CPU_TO_DDR }; + +static const struct core_ratio __initconst dove_core_ratios[] = { + { .id = DOVE_CPU_TO_L2, .name = "l2clk", }, + { .id = DOVE_CPU_TO_DDR, .name = "ddrclk", } +}; + +static const int __initconst dove_cpu_l2_ratios[8][2] = { + { 1, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 }, + { 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 } +}; + +static const int __initconst dove_cpu_ddr_ratios[16][2] = { + { 1, 1 }, { 0, 1 }, { 1, 2 }, { 2, 5 }, + { 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 }, + { 1, 5 }, { 0, 1 }, { 1, 6 }, { 0, 1 }, + { 1, 7 }, { 0, 1 }, { 1, 8 }, { 1, 10 } +}; + +static void __init dove_get_clk_ratio( + void __iomem *sar, int id, int *mult, int *div) +{ + switch (id) { + case DOVE_CPU_TO_L2: + { + u32 opt = (readl(sar) >> SAR_DOVE_L2_RATIO) & + SAR_DOVE_L2_RATIO_MASK; + *mult = dove_cpu_l2_ratios[opt][0]; + *div = dove_cpu_l2_ratios[opt][1]; + break; + } + case DOVE_CPU_TO_DDR: + { + u32 opt = (readl(sar) >> SAR_DOVE_DDR_RATIO) & + SAR_DOVE_DDR_RATIO_MASK; + *mult = dove_cpu_ddr_ratios[opt][0]; + *div = dove_cpu_ddr_ratios[opt][1]; + break; + } + } +} + +static const struct core_clocks dove_core_clocks = { + .get_tclk_freq = dove_get_tclk_freq, + .get_cpu_freq = dove_get_cpu_freq, + .get_clk_ratio = dove_get_clk_ratio, + .ratios = dove_core_ratios, + .num_ratios = ARRAY_SIZE(dove_core_ratios), +}; +#endif /* CONFIG_ARCH_DOVE */ + +/* + * Kirkwood PLL sample-at-reset configuration + * (6180 has different SAR layout than other Kirkwood SoCs) + * + * SAR0[4:3,22,1] : CPU frequency (6281,6292,6282) + * 4 = 600 MHz + * 6 = 800 MHz + * 7 = 1000 MHz + * 9 = 1200 MHz + * 12 = 1500 MHz + * 13 = 1600 MHz + * 14 = 1800 MHz + * 15 = 2000 MHz + * others reserved. + * + * SAR0[19,10:9] : CPU to L2 Clock divider ratio (6281,6292,6282) + * 1 = (1/2) * CPU + * 3 = (1/3) * CPU + * 5 = (1/4) * CPU + * others reserved. + * + * SAR0[8:5] : CPU to DDR DRAM Clock divider ratio (6281,6292,6282) + * 2 = (1/2) * CPU + * 4 = (1/3) * CPU + * 6 = (1/4) * CPU + * 7 = (2/9) * CPU + * 8 = (1/5) * CPU + * 9 = (1/6) * CPU + * others reserved. + * + * SAR0[4:2] : Kirkwood 6180 cpu/l2/ddr clock configuration (6180 only) + * 5 = [CPU = 600 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/3) * CPU] + * 6 = [CPU = 800 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/4) * CPU] + * 7 = [CPU = 1000 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/5) * CPU] + * others reserved. + * + * SAR0[21] : TCLK frequency + * 0 = 200 MHz + * 1 = 166 MHz + * others reserved. + */ +#ifdef CONFIG_ARCH_KIRKWOOD +#define SAR_KIRKWOOD_CPU_FREQ(x) \ + (((x & (1 << 1)) >> 1) | \ + ((x & (1 << 22)) >> 21) | \ + ((x & (3 << 3)) >> 1)) +#define SAR_KIRKWOOD_L2_RATIO(x) \ + (((x & (3 << 9)) >> 9) | \ + (((x & (1 << 19)) >> 17))) +#define SAR_KIRKWOOD_DDR_RATIO 5 +#define SAR_KIRKWOOD_DDR_RATIO_MASK 0xf +#define SAR_MV88F6180_CLK 2 +#define SAR_MV88F6180_CLK_MASK 0x7 +#define SAR_KIRKWOOD_TCLK_FREQ 21 +#define SAR_KIRKWOOD_TCLK_FREQ_MASK 0x1 + +enum { KIRKWOOD_CPU_TO_L2, KIRKWOOD_CPU_TO_DDR }; + +static const struct core_ratio __initconst kirkwood_core_ratios[] = { + { .id = KIRKWOOD_CPU_TO_L2, .name = "l2clk", }, + { .id = KIRKWOOD_CPU_TO_DDR, .name = "ddrclk", } +}; + +static u32 __init kirkwood_get_tclk_freq(void __iomem *sar) +{ + u32 opt = (readl(sar) >> SAR_KIRKWOOD_TCLK_FREQ) & + SAR_KIRKWOOD_TCLK_FREQ_MASK; + return (opt) ? 166666667 : 200000000; +} + +static const u32 __initconst kirkwood_cpu_frequencies[] = { + 0, 0, 0, 0, + 600000000, + 0, + 800000000, + 1000000000, + 0, + 1200000000, + 0, 0, + 1500000000, + 1600000000, + 1800000000, + 2000000000 +}; + +static u32 __init kirkwood_get_cpu_freq(void __iomem *sar) +{ + u32 opt = SAR_KIRKWOOD_CPU_FREQ(readl(sar)); + return kirkwood_cpu_frequencies[opt]; +} + +static const int __initconst kirkwood_cpu_l2_ratios[8][2] = { + { 0, 1 }, { 1, 2 }, { 0, 1 }, { 1, 3 }, + { 0, 1 }, { 1, 4 }, { 0, 1 }, { 0, 1 } +}; + +static const int __initconst kirkwood_cpu_ddr_ratios[16][2] = { + { 0, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 }, + { 1, 3 }, { 0, 1 }, { 1, 4 }, { 2, 9 }, + { 1, 5 }, { 1, 6 }, { 0, 1 }, { 0, 1 }, + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 } +}; + +static void __init kirkwood_get_clk_ratio( + void __iomem *sar, int id, int *mult, int *div) +{ + switch (id) { + case KIRKWOOD_CPU_TO_L2: + { + u32 opt = SAR_KIRKWOOD_L2_RATIO(readl(sar)); + *mult = kirkwood_cpu_l2_ratios[opt][0]; + *div = kirkwood_cpu_l2_ratios[opt][1]; + break; + } + case KIRKWOOD_CPU_TO_DDR: + { + u32 opt = (readl(sar) >> SAR_KIRKWOOD_DDR_RATIO) & + SAR_KIRKWOOD_DDR_RATIO_MASK; + *mult = kirkwood_cpu_ddr_ratios[opt][0]; + *div = kirkwood_cpu_ddr_ratios[opt][1]; + break; + } + } +} + +static const struct core_clocks kirkwood_core_clocks = { + .get_tclk_freq = kirkwood_get_tclk_freq, + .get_cpu_freq = kirkwood_get_cpu_freq, + .get_clk_ratio = kirkwood_get_clk_ratio, + .ratios = kirkwood_core_ratios, + .num_ratios = ARRAY_SIZE(kirkwood_core_ratios), +}; + +static const u32 __initconst mv88f6180_cpu_frequencies[] = { + 0, 0, 0, 0, 0, + 600000000, + 800000000, + 1000000000 +}; + +static u32 __init mv88f6180_get_cpu_freq(void __iomem *sar) +{ + u32 opt = (readl(sar) >> SAR_MV88F6180_CLK) & SAR_MV88F6180_CLK_MASK; + return mv88f6180_cpu_frequencies[opt]; +} + +static const int __initconst mv88f6180_cpu_ddr_ratios[8][2] = { + { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, + { 0, 1 }, { 1, 3 }, { 1, 4 }, { 1, 5 } +}; + +static void __init mv88f6180_get_clk_ratio( + void __iomem *sar, int id, int *mult, int *div) +{ + switch (id) { + case KIRKWOOD_CPU_TO_L2: + { + /* mv88f6180 has a fixed 1:2 CPU-to-L2 ratio */ + *mult = 1; + *div = 2; + break; + } + case KIRKWOOD_CPU_TO_DDR: + { + u32 opt = (readl(sar) >> SAR_MV88F6180_CLK) & + SAR_MV88F6180_CLK_MASK; + *mult = mv88f6180_cpu_ddr_ratios[opt][0]; + *div = mv88f6180_cpu_ddr_ratios[opt][1]; + break; + } + } +} + +static const struct core_clocks mv88f6180_core_clocks = { + .get_tclk_freq = kirkwood_get_tclk_freq, + .get_cpu_freq = mv88f6180_get_cpu_freq, + .get_clk_ratio = mv88f6180_get_clk_ratio, + .ratios = kirkwood_core_ratios, + .num_ratios = ARRAY_SIZE(kirkwood_core_ratios), +}; +#endif /* CONFIG_ARCH_KIRKWOOD */ + +static const __initdata struct of_device_id clk_core_match[] = { +#ifdef CONFIG_MACH_ARMADA_370_XP + { + .compatible = "marvell,armada-370-core-clock", + .data = &armada_370_core_clocks, + }, + { + .compatible = "marvell,armada-xp-core-clock", + .data = &armada_xp_core_clocks, + }, +#endif +#ifdef CONFIG_ARCH_DOVE + { + .compatible = "marvell,dove-core-clock", + .data = &dove_core_clocks, + }, +#endif + +#ifdef CONFIG_ARCH_KIRKWOOD + { + .compatible = "marvell,kirkwood-core-clock", + .data = &kirkwood_core_clocks, + }, + { + .compatible = "marvell,mv88f6180-core-clock", + .data = &mv88f6180_core_clocks, + }, +#endif + + { } +}; + +void __init mvebu_core_clk_init(void) +{ + struct device_node *np; + + for_each_matching_node(np, clk_core_match) { + const struct of_device_id *match = + of_match_node(clk_core_match, np); + mvebu_clk_core_setup(np, (struct core_clocks *)match->data); + } +} diff --git a/drivers/clk/mvebu/clk-core.h b/drivers/clk/mvebu/clk-core.h new file mode 100644 index 000000000000..28b5e02e9885 --- /dev/null +++ b/drivers/clk/mvebu/clk-core.h @@ -0,0 +1,18 @@ +/* + * * Marvell EBU clock core handling defined at reset + * + * Copyright (C) 2012 Marvell + * + * Gregory CLEMENT + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __MVEBU_CLK_CORE_H +#define __MVEBU_CLK_CORE_H + +void __init mvebu_core_clk_init(void); + +#endif diff --git a/drivers/clk/mvebu/clk.c b/drivers/clk/mvebu/clk.c new file mode 100644 index 000000000000..e6742acf9880 --- /dev/null +++ b/drivers/clk/mvebu/clk.c @@ -0,0 +1,23 @@ +/* + * Marvell EBU SoC clock handling. + * + * Copyright (C) 2012 Marvell + * + * Gregory CLEMENT + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ +#include +#include +#include +#include +#include +#include +#include "clk-core.h" + +void __init mvebu_clocks_init(void) +{ + mvebu_core_clk_init(); +} diff --git a/include/linux/clk/mvebu.h b/include/linux/clk/mvebu.h new file mode 100644 index 000000000000..8c4ae713b063 --- /dev/null +++ b/include/linux/clk/mvebu.h @@ -0,0 +1,22 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __CLK_MVEBU_H_ +#define __CLK_MVEBU_H_ + +void __init mvebu_clocks_init(void); + +#endif -- cgit v1.2.3 From 60d151f38799d5e15845ee04b73cbf3839f1b06c Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 29 Oct 2012 16:54:49 +0100 Subject: dma: mv_xor: allow channels to be registered directly from the main device Extend the XOR engine driver (currently called "mv_xor_shared") so that XOR channels can be passed in the platform_data structure, and be registered from there. This will allow the users of the driver to be converted to the single platform_driver variant of the mv_xor driver. Signed-off-by: Thomas Petazzoni --- drivers/dma/mv_xor.c | 45 ++++++++++++++++++++++++++++++++ drivers/dma/mv_xor.h | 8 +++--- include/linux/platform_data/dma-mv_xor.h | 3 +++ 3 files changed, 53 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index f7b99193e884..6ed3162cccc9 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1292,7 +1292,9 @@ static int mv_xor_shared_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; struct mv_xor_shared_private *msp; + struct mv_xor_shared_platform_data *pdata = pdev->dev.platform_data; struct resource *res; + int i, ret; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); @@ -1334,12 +1336,55 @@ static int mv_xor_shared_probe(struct platform_device *pdev) if (!IS_ERR(msp->clk)) clk_prepare_enable(msp->clk); + if (pdata && pdata->channels) { + for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { + struct mv_xor_platform_data *cd; + int irq; + + cd = &pdata->channels[i]; + if (!cd) { + ret = -ENODEV; + goto err_channel_add; + } + + irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = irq; + goto err_channel_add; + } + + msp->channels[i] = + mv_xor_channel_add(msp, pdev, cd->hw_id, + cd->cap_mask, + cd->pool_size, irq); + if (IS_ERR(msp->channels[i])) { + ret = PTR_ERR(msp->channels[i]); + goto err_channel_add; + } + } + } + return 0; + +err_channel_add: + for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) + if (msp->channels[i]) + mv_xor_channel_remove(msp->channels[i]); + + clk_disable_unprepare(msp->clk); + clk_put(msp->clk); + return ret; } static int mv_xor_shared_remove(struct platform_device *pdev) { struct mv_xor_shared_private *msp = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { + if (msp->channels[i]) + mv_xor_channel_remove(msp->channels[i]); + } if (!IS_ERR(msp->clk)) { clk_disable_unprepare(msp->clk); diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index a0641aebbdef..686575f6b9f5 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -26,6 +26,7 @@ #define USE_TIMER #define MV_XOR_SLOT_SIZE 64 #define MV_XOR_THRESHOLD 1 +#define MV_XOR_MAX_CHANNELS 2 #define XOR_OPERATION_MODE_XOR 0 #define XOR_OPERATION_MODE_MEMCPY 2 @@ -53,9 +54,10 @@ #define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2)) struct mv_xor_shared_private { - void __iomem *xor_base; - void __iomem *xor_high_base; - struct clk *clk; + void __iomem *xor_base; + void __iomem *xor_high_base; + struct clk *clk; + struct mv_xor_device *channels[MV_XOR_MAX_CHANNELS]; }; diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index 2ba1f7d76eef..f2aed978ca25 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -20,5 +20,8 @@ struct mv_xor_platform_data { size_t pool_size; }; +struct mv_xor_shared_platform_data { + struct mv_xor_platform_data *channels; +}; #endif -- cgit v1.2.3 From 18b2a02c7c8db8bb87a165d26c269968d3dd47bd Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 30 Oct 2012 11:54:34 +0100 Subject: dma: mv_xor: remove sub-driver 'mv_xor' Now that XOR channels are directly registered by the main 'mv_xor_shared' device ->probe() function and all users of the 'mv_xor' device have been removed, we can get rid of the latter. Signed-off-by: Thomas Petazzoni --- drivers/dma/mv_xor.c | 49 +------------------------------- include/linux/platform_data/dma-mv_xor.h | 1 - 2 files changed, 1 insertion(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 6ed3162cccc9..be3907bdef14 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1219,35 +1219,6 @@ mv_xor_channel_add(struct mv_xor_shared_private *msp, return ERR_PTR(ret); } -static int __devexit mv_xor_remove(struct platform_device *pdev) -{ - struct mv_xor_device *device = platform_get_drvdata(pdev); - return mv_xor_channel_remove(device); -} - -static int __devinit mv_xor_probe(struct platform_device *pdev) -{ - struct mv_xor_platform_data *plat_data = pdev->dev.platform_data; - struct mv_xor_shared_private *msp = - platform_get_drvdata(plat_data->shared); - struct mv_xor_device *mv_xor_device; - int irq; - - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - mv_xor_device = mv_xor_channel_add(msp, pdev, plat_data->hw_id, - plat_data->cap_mask, - plat_data->pool_size, irq); - if (IS_ERR(mv_xor_device)) - return PTR_ERR(mv_xor_device); - - platform_set_drvdata(pdev, mv_xor_device); - - return 0; -} - static void mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp, const struct mbus_dram_target_info *dram) @@ -1279,15 +1250,6 @@ mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp, writel(win_enable, base + WINDOW_BAR_ENABLE(1)); } -static struct platform_driver mv_xor_driver = { - .probe = mv_xor_probe, - .remove = __devexit_p(mv_xor_remove), - .driver = { - .owner = THIS_MODULE, - .name = MV_XOR_NAME, - }, -}; - static int mv_xor_shared_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; @@ -1406,15 +1368,7 @@ static struct platform_driver mv_xor_shared_driver = { static int __init mv_xor_init(void) { - int rc; - - rc = platform_driver_register(&mv_xor_shared_driver); - if (!rc) { - rc = platform_driver_register(&mv_xor_driver); - if (rc) - platform_driver_unregister(&mv_xor_shared_driver); - } - return rc; + return platform_driver_register(&mv_xor_shared_driver); } module_init(mv_xor_init); @@ -1422,7 +1376,6 @@ module_init(mv_xor_init); #if 0 static void __exit mv_xor_exit(void) { - platform_driver_unregister(&mv_xor_driver); platform_driver_unregister(&mv_xor_shared_driver); return; } diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index f2aed978ca25..6abe5f9326b6 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -11,7 +11,6 @@ #include #define MV_XOR_SHARED_NAME "mv_xor_shared" -#define MV_XOR_NAME "mv_xor" struct mv_xor_platform_data { struct platform_device *shared; -- cgit v1.2.3 From 2ccc469cfecee291707dd50e5842cbf206bc17d7 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 31 Oct 2012 13:24:41 +0100 Subject: dma: mv_xor: remove 'shared' from mv_xor_platform_data This member of the platform_data structure is no longer used, so get rid of it. Signed-off-by: Thomas Petazzoni --- arch/arm/plat-orion/common.c | 8 -------- include/linux/platform_data/dma-mv_xor.h | 1 - 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index c6e6666986ff..5a66211d523c 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -625,16 +625,12 @@ static struct resource orion_xor0_shared_resources[] = { }, }; -static struct platform_device orion_xor0_shared; - static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = { { - .shared = &orion_xor0_shared, .hw_id = 0, .pool_size = PAGE_SIZE, }, { - .shared = &orion_xor0_shared, .hw_id = 1, .pool_size = PAGE_SIZE, }, @@ -704,16 +700,12 @@ static struct resource orion_xor1_shared_resources[] = { }, }; -static struct platform_device orion_xor1_shared; - static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = { { - .shared = &orion_xor1_shared, .hw_id = 0, .pool_size = PAGE_SIZE, }, { - .shared = &orion_xor1_shared, .hw_id = 1, .pool_size = PAGE_SIZE, }, diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index 6abe5f9326b6..4a0980b14c9b 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -13,7 +13,6 @@ #define MV_XOR_SHARED_NAME "mv_xor_shared" struct mv_xor_platform_data { - struct platform_device *shared; int hw_id; dma_cap_mask_t cap_mask; size_t pool_size; -- cgit v1.2.3 From e39f6ec1f9c1d6a7011adf6d95d8d80bad0586b1 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 30 Oct 2012 11:56:26 +0100 Subject: dma: mv_xor: rename mv_xor_platform_data to mv_xor_channel_data mv_xor_platform_data used to be the platform_data structure associated to the 'mv_xor' driver. This driver no longer exists, and this data structure really contains the properties of each XOR channel part of a given XOR engine. Therefore 'struct mv_xor_channel_data' is a more appropriate name. Signed-off-by: Thomas Petazzoni --- arch/arm/plat-orion/common.c | 28 ++++++++++++++-------------- drivers/dma/mv_xor.c | 2 +- include/linux/platform_data/dma-mv_xor.h | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 5a66211d523c..7ffbe77c52cb 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -625,7 +625,7 @@ static struct resource orion_xor0_shared_resources[] = { }, }; -static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = { +static struct mv_xor_channel_data orion_xor0_channels_data[2] = { { .hw_id = 0, .pool_size = PAGE_SIZE, @@ -637,7 +637,7 @@ static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = { }; static struct mv_xor_shared_platform_data orion_xor0_pdata = { - .channels = orion_xor0_channels_pdata, + .channels = orion_xor0_channels_data, }; static struct platform_device orion_xor0_shared = { @@ -671,12 +671,12 @@ void __init orion_xor0_init(unsigned long mapbase_low, * two engines can't do memset simultaneously, this limitation * satisfied by removing memset support from one of the engines. */ - dma_cap_set(DMA_MEMCPY, orion_xor0_channels_pdata[0].cap_mask); - dma_cap_set(DMA_XOR, orion_xor0_channels_pdata[0].cap_mask); + dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[0].cap_mask); + dma_cap_set(DMA_XOR, orion_xor0_channels_data[0].cap_mask); - dma_cap_set(DMA_MEMSET, orion_xor0_channels_pdata[1].cap_mask); - dma_cap_set(DMA_MEMCPY, orion_xor0_channels_pdata[1].cap_mask); - dma_cap_set(DMA_XOR, orion_xor0_channels_pdata[1].cap_mask); + dma_cap_set(DMA_MEMSET, orion_xor0_channels_data[1].cap_mask); + dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[1].cap_mask); + dma_cap_set(DMA_XOR, orion_xor0_channels_data[1].cap_mask); platform_device_register(&orion_xor0_shared); } @@ -700,7 +700,7 @@ static struct resource orion_xor1_shared_resources[] = { }, }; -static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = { +static struct mv_xor_channel_data orion_xor1_channels_data[2] = { { .hw_id = 0, .pool_size = PAGE_SIZE, @@ -712,7 +712,7 @@ static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = { }; static struct mv_xor_shared_platform_data orion_xor1_pdata = { - .channels = orion_xor1_channels_pdata, + .channels = orion_xor1_channels_data, }; static struct platform_device orion_xor1_shared = { @@ -746,12 +746,12 @@ void __init orion_xor1_init(unsigned long mapbase_low, * two engines can't do memset simultaneously, this limitation * satisfied by removing memset support from one of the engines. */ - dma_cap_set(DMA_MEMCPY, orion_xor1_channels_pdata[0].cap_mask); - dma_cap_set(DMA_XOR, orion_xor1_channels_pdata[0].cap_mask); + dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[0].cap_mask); + dma_cap_set(DMA_XOR, orion_xor1_channels_data[0].cap_mask); - dma_cap_set(DMA_MEMSET, orion_xor1_channels_pdata[1].cap_mask); - dma_cap_set(DMA_MEMCPY, orion_xor1_channels_pdata[1].cap_mask); - dma_cap_set(DMA_XOR, orion_xor1_channels_pdata[1].cap_mask); + dma_cap_set(DMA_MEMSET, orion_xor1_channels_data[1].cap_mask); + dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[1].cap_mask); + dma_cap_set(DMA_XOR, orion_xor1_channels_data[1].cap_mask); platform_device_register(&orion_xor1_shared); } diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index be3907bdef14..c7926e417281 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1300,7 +1300,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev) if (pdata && pdata->channels) { for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { - struct mv_xor_platform_data *cd; + struct mv_xor_channel_data *cd; int irq; cd = &pdata->channels[i]; diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index 4a0980b14c9b..40ea3d5f5b9f 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -12,14 +12,14 @@ #define MV_XOR_SHARED_NAME "mv_xor_shared" -struct mv_xor_platform_data { +struct mv_xor_channel_data { int hw_id; dma_cap_mask_t cap_mask; size_t pool_size; }; struct mv_xor_shared_platform_data { - struct mv_xor_platform_data *channels; + struct mv_xor_channel_data *channels; }; #endif -- cgit v1.2.3 From 7dde453d628687c0e991cfc55c9fd299a804aee6 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 30 Oct 2012 11:58:14 +0100 Subject: dma: mv_xor: rename mv_xor_shared_platform_data to mv_xor_platform_data 'struct mv_xor_shared_platform_data' used to be the platform_data structure for the 'mv_xor_shared', but this driver is going to be renamed simply 'mv_xor', so also rename its platform_data structure accordingly. Signed-off-by: Thomas Petazzoni --- arch/arm/plat-orion/common.c | 4 ++-- drivers/dma/mv_xor.c | 2 +- include/linux/platform_data/dma-mv_xor.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 7ffbe77c52cb..edd57a68fa8c 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -636,7 +636,7 @@ static struct mv_xor_channel_data orion_xor0_channels_data[2] = { }, }; -static struct mv_xor_shared_platform_data orion_xor0_pdata = { +static struct mv_xor_platform_data orion_xor0_pdata = { .channels = orion_xor0_channels_data, }; @@ -711,7 +711,7 @@ static struct mv_xor_channel_data orion_xor1_channels_data[2] = { }, }; -static struct mv_xor_shared_platform_data orion_xor1_pdata = { +static struct mv_xor_platform_data orion_xor1_pdata = { .channels = orion_xor1_channels_data, }; diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index c7926e417281..ac598168b21f 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1254,7 +1254,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; struct mv_xor_shared_private *msp; - struct mv_xor_shared_platform_data *pdata = pdev->dev.platform_data; + struct mv_xor_platform_data *pdata = pdev->dev.platform_data; struct resource *res; int i, ret; diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index 40ea3d5f5b9f..82a5f4b84afe 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -18,7 +18,7 @@ struct mv_xor_channel_data { size_t pool_size; }; -struct mv_xor_shared_platform_data { +struct mv_xor_platform_data { struct mv_xor_channel_data *channels; }; -- cgit v1.2.3 From 0dddee7a7d42192267ebef0fe15be8b296b665c8 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 30 Oct 2012 11:59:42 +0100 Subject: dma: mv_xor: change the driver name to 'mv_xor' Since we got rid of the per-XOR channel 'mv_xor' driver, now the per-XOR engine driver that used to be called 'mv_xor_shared' can simply be named 'mv_xor'. Signed-off-by: Thomas Petazzoni --- arch/arm/mach-dove/common.c | 9 +++++---- arch/arm/mach-kirkwood/board-dt.c | 5 +++-- arch/arm/mach-kirkwood/common.c | 4 ++-- arch/arm/plat-orion/common.c | 4 ++-- drivers/dma/mv_xor.c | 2 +- include/linux/platform_data/dma-mv_xor.h | 2 +- 6 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index 6a2c4dc413a8..f4ac5b06014b 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -124,8 +125,8 @@ static void __init dove_clk_init(void) orion_clkdev_add(NULL, "mv_crypto", crypto); orion_clkdev_add(NULL, "dove-ac97", ac97); orion_clkdev_add(NULL, "dove-pdma", pdma); - orion_clkdev_add(NULL, "mv_xor_shared.0", xor0); - orion_clkdev_add(NULL, "mv_xor_shared.1", xor1); + orion_clkdev_add(NULL, MV_XOR_NAME ".0", xor0); + orion_clkdev_add(NULL, MV_XOR_NAME ".1", xor1); } /***************************************************************************** @@ -410,11 +411,11 @@ static void __init dove_legacy_clk_init(void) of_clk_get_from_provider(&clkspec)); clkspec.args[0] = CLOCK_GATING_BIT_XOR0; - orion_clkdev_add(NULL, "mv_xor_shared.0", + orion_clkdev_add(NULL, MV_XOR_NAME ".0", of_clk_get_from_provider(&clkspec)); clkspec.args[0] = CLOCK_GATING_BIT_XOR1; - orion_clkdev_add(NULL, "mv_xor_shared.1", + orion_clkdev_add(NULL, MV_XOR_NAME ".1", of_clk_get_from_provider(&clkspec)); } diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c index 8bdfaa4db091..294ad5a4fd98 100644 --- a/arch/arm/mach-kirkwood/board-dt.c +++ b/arch/arm/mach-kirkwood/board-dt.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "common.h" @@ -60,11 +61,11 @@ static void __init kirkwood_legacy_clk_init(void) of_clk_get_from_provider(&clkspec)); clkspec.args[0] = CGC_BIT_XOR0; - orion_clkdev_add(NULL, "mv_xor_shared.0", + orion_clkdev_add(NULL, MV_XOR_NAME ".0", of_clk_get_from_provider(&clkspec)); clkspec.args[0] = CGC_BIT_XOR1; - orion_clkdev_add(NULL, "mv_xor_shared.1", + orion_clkdev_add(NULL, MV_XOR_NAME ".1", of_clk_get_from_provider(&clkspec)); clkspec.args[0] = CGC_BIT_PEX1; diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 2c6c218fb79e..401dac1a8d80 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -260,8 +260,8 @@ void __init kirkwood_clk_init(void) orion_clkdev_add(NULL, "orion_nand", runit); orion_clkdev_add(NULL, "mvsdio", sdio); orion_clkdev_add(NULL, "mv_crypto", crypto); - orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".0", xor0); - orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".1", xor1); + orion_clkdev_add(NULL, MV_XOR_NAME ".0", xor0); + orion_clkdev_add(NULL, MV_XOR_NAME ".1", xor1); orion_clkdev_add("0", "pcie", pex0); orion_clkdev_add("1", "pcie", pex1); orion_clkdev_add(NULL, "kirkwood-i2s", audio); diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index edd57a68fa8c..31517cef8c4d 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -641,7 +641,7 @@ static struct mv_xor_platform_data orion_xor0_pdata = { }; static struct platform_device orion_xor0_shared = { - .name = MV_XOR_SHARED_NAME, + .name = MV_XOR_NAME, .id = 0, .num_resources = ARRAY_SIZE(orion_xor0_shared_resources), .resource = orion_xor0_shared_resources, @@ -716,7 +716,7 @@ static struct mv_xor_platform_data orion_xor1_pdata = { }; static struct platform_device orion_xor1_shared = { - .name = MV_XOR_SHARED_NAME, + .name = MV_XOR_NAME, .id = 1, .num_resources = ARRAY_SIZE(orion_xor1_shared_resources), .resource = orion_xor1_shared_resources, diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index ac598168b21f..0ed5183eb5a3 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1361,7 +1361,7 @@ static struct platform_driver mv_xor_shared_driver = { .remove = mv_xor_shared_remove, .driver = { .owner = THIS_MODULE, - .name = MV_XOR_SHARED_NAME, + .name = MV_XOR_NAME, }, }; diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index 82a5f4b84afe..367bb216c4a7 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -10,7 +10,7 @@ #include #include -#define MV_XOR_SHARED_NAME "mv_xor_shared" +#define MV_XOR_NAME "mv_xor" struct mv_xor_channel_data { int hw_id; -- cgit v1.2.3 From 9aedbdbab39c8aa58c0b2a0791fb10df6eebc123 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 15 Nov 2012 15:36:37 +0100 Subject: dma: mv_xor: remove hw_id field from platform_data There is no need for the platform_data to give this ID, it is simply the channel number, so we can compute it inside the driver when registering the channels. Signed-off-by: Thomas Petazzoni --- arch/arm/plat-orion/common.c | 4 ---- drivers/dma/mv_xor.c | 6 +++--- include/linux/platform_data/dma-mv_xor.h | 1 - 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 31517cef8c4d..09d836060bf4 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -627,11 +627,9 @@ static struct resource orion_xor0_shared_resources[] = { static struct mv_xor_channel_data orion_xor0_channels_data[2] = { { - .hw_id = 0, .pool_size = PAGE_SIZE, }, { - .hw_id = 1, .pool_size = PAGE_SIZE, }, }; @@ -702,11 +700,9 @@ static struct resource orion_xor1_shared_resources[] = { static struct mv_xor_channel_data orion_xor1_channels_data[2] = { { - .hw_id = 0, .pool_size = PAGE_SIZE, }, { - .hw_id = 1, .pool_size = PAGE_SIZE, }, }; diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index a6a5a28574c4..fc983bf38438 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1088,7 +1088,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int hw_id, dma_cap_mask_t cap_mask, + int idx, dma_cap_mask_t cap_mask, size_t pool_size, int irq) { int ret = 0; @@ -1101,7 +1101,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, goto err_free_dma; } - mv_chan->idx = hw_id; + mv_chan->idx = idx; dma_dev = &mv_chan->dmadev; @@ -1295,7 +1295,7 @@ static int mv_xor_probe(struct platform_device *pdev) } xordev->channels[i] = - mv_xor_channel_add(xordev, pdev, cd->hw_id, + mv_xor_channel_add(xordev, pdev, i, cd->cap_mask, cd->pool_size, irq); if (IS_ERR(xordev->channels[i])) { diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index 367bb216c4a7..b18dc2496186 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -13,7 +13,6 @@ #define MV_XOR_NAME "mv_xor" struct mv_xor_channel_data { - int hw_id; dma_cap_mask_t cap_mask; size_t pool_size; }; -- cgit v1.2.3 From b503fa01990f6875640339d8f4ba98dbc068f821 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 15 Nov 2012 15:55:30 +0100 Subject: dma: mv_xor: remove the pool_size from platform_data The pool_size is always PAGE_SIZE, and since it is a software configuration paramter (and not a hardware description parameter), we cannot make it part of the Device Tree binding, so we'd better remove it from the platform_data as well. Signed-off-by: Thomas Petazzoni --- arch/arm/plat-orion/common.c | 18 ++---------------- drivers/dma/mv_xor.c | 15 ++++++--------- drivers/dma/mv_xor.h | 1 + include/linux/platform_data/dma-mv_xor.h | 1 - 4 files changed, 9 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 09d836060bf4..2d4b6414609f 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -625,14 +625,7 @@ static struct resource orion_xor0_shared_resources[] = { }, }; -static struct mv_xor_channel_data orion_xor0_channels_data[2] = { - { - .pool_size = PAGE_SIZE, - }, - { - .pool_size = PAGE_SIZE, - }, -}; +static struct mv_xor_channel_data orion_xor0_channels_data[2]; static struct mv_xor_platform_data orion_xor0_pdata = { .channels = orion_xor0_channels_data, @@ -698,14 +691,7 @@ static struct resource orion_xor1_shared_resources[] = { }, }; -static struct mv_xor_channel_data orion_xor1_channels_data[2] = { - { - .pool_size = PAGE_SIZE, - }, - { - .pool_size = PAGE_SIZE, - }, -}; +static struct mv_xor_channel_data orion_xor1_channels_data[2]; static struct mv_xor_platform_data orion_xor1_pdata = { .channels = orion_xor1_channels_data, diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index fc983bf38438..ec741b4607e2 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -603,7 +603,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) int idx; struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); struct mv_xor_desc_slot *slot = NULL; - int num_descs_in_pool = mv_chan->pool_size/MV_XOR_SLOT_SIZE; + int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE; /* Allocate descriptor slots */ idx = mv_chan->slots_allocated; @@ -1074,7 +1074,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) dma_async_device_unregister(&mv_chan->dmadev); - dma_free_coherent(dev, mv_chan->pool_size, + dma_free_coherent(dev, MV_XOR_POOL_SIZE, mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool); list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels, @@ -1088,8 +1088,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int idx, dma_cap_mask_t cap_mask, - size_t pool_size, int irq) + int idx, dma_cap_mask_t cap_mask, int irq) { int ret = 0; struct mv_xor_chan *mv_chan; @@ -1109,9 +1108,8 @@ mv_xor_channel_add(struct mv_xor_device *xordev, * note: writecombine gives slightly better performance, but * requires that we explicitly flush the writes */ - mv_chan->pool_size = pool_size; mv_chan->dma_desc_pool_virt = - dma_alloc_writecombine(&pdev->dev, mv_chan->pool_size, + dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool, GFP_KERNEL); if (!mv_chan->dma_desc_pool_virt) return ERR_PTR(-ENOMEM); @@ -1193,7 +1191,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, return mv_chan; err_free_dma: - dma_free_coherent(&pdev->dev, pool_size, + dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE, mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool); return ERR_PTR(ret); } @@ -1296,8 +1294,7 @@ static int mv_xor_probe(struct platform_device *pdev) xordev->channels[i] = mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, - cd->pool_size, irq); + cd->cap_mask, irq); if (IS_ERR(xordev->channels[i])) { ret = PTR_ERR(xordev->channels[i]); goto err_channel_add; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index dab9f30e564a..698b4487b348 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -24,6 +24,7 @@ #include #define USE_TIMER +#define MV_XOR_POOL_SIZE PAGE_SIZE #define MV_XOR_SLOT_SIZE 64 #define MV_XOR_THRESHOLD 1 #define MV_XOR_MAX_CHANNELS 2 diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h index b18dc2496186..8ec18f64e396 100644 --- a/include/linux/platform_data/dma-mv_xor.h +++ b/include/linux/platform_data/dma-mv_xor.h @@ -14,7 +14,6 @@ struct mv_xor_channel_data { dma_cap_mask_t cap_mask; - size_t pool_size; }; struct mv_xor_platform_data { -- cgit v1.2.3 From ae72ae676045274c82f3c25159a9dd7cfcf5ffae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 11:02:55 -0500 Subject: NFSv4.1: Don't confuse CREATE_SESSION arguments and results Don't store the target request and response sizes in the same variables used to store the server's replies to those targets. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 30 +++++++++++++++++------------- include/linux/nfs_fs_sb.h | 3 +++ 2 files changed, 20 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a32d953b08de..3e572dc316e4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5807,8 +5807,8 @@ void nfs4_destroy_session(struct nfs4_session *session) static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) { struct nfs4_session *session = args->client->cl_session; - unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz, - mxresp_sz = session->fc_attrs.max_resp_sz; + unsigned int mxrqst_sz = session->fc_target_max_rqst_sz, + mxresp_sz = session->fc_target_max_resp_sz; if (mxrqst_sz == 0) mxrqst_sz = NFS_MAX_FILE_IO_SIZE; @@ -6015,24 +6015,28 @@ int nfs4_init_session(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs4_session *session; - unsigned int rsize, wsize; + unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; + unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; if (!nfs4_has_session(clp)) return 0; + if (server->rsize != 0) + target_max_resp_sz = server->rsize; + target_max_resp_sz += nfs41_maxread_overhead; + + if (server->wsize != 0) + target_max_rqst_sz = server->wsize; + target_max_rqst_sz += nfs41_maxwrite_overhead; + session = clp->cl_session; spin_lock(&clp->cl_lock); if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - - rsize = server->rsize; - if (rsize == 0) - rsize = NFS_MAX_FILE_IO_SIZE; - wsize = server->wsize; - if (wsize == 0) - wsize = NFS_MAX_FILE_IO_SIZE; - - session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; - session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; + /* Initialise targets and channel attributes */ + session->fc_target_max_rqst_sz = target_max_rqst_sz; + session->fc_attrs.max_rqst_sz = target_max_rqst_sz; + session->fc_target_max_resp_sz = target_max_resp_sz; + session->fc_attrs.max_resp_sz = target_max_resp_sz; } spin_unlock(&clp->cl_lock); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a9e76ee1adca..97c8f9191880 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -242,6 +242,9 @@ struct nfs4_session { struct nfs4_channel_attrs bc_attrs; struct nfs4_slot_table bc_slot_table; struct nfs_client *clp; + /* Create session arguments */ + unsigned int fc_target_max_rqst_sz; + unsigned int fc_target_max_resp_sz; }; #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From 933602e368c4452260c9bff4fbb3baba35cf987a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:12:38 -0500 Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving sr_renewal_time Store the renewal time inside the session slot instead. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++++++++------ include/linux/nfs_xdr.h | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e5cc5a5065f..14b39742b6e4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -486,6 +486,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { + struct nfs4_slot *slot; unsigned long timestamp; struct nfs_client *clp; @@ -502,12 +503,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * if (!RPC_WAS_SENT(task)) goto out; + slot = res->sr_slot; + /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: /* Update the slot's sequence and clientid lease timer */ - ++res->sr_slot->seq_nr; - timestamp = res->sr_renewal_time; + ++slot->seq_nr; + timestamp = slot->renewal_time; clp = res->sr_session->clp; do_renew_lease(clp, timestamp); /* Check sequence flags */ @@ -521,12 +524,12 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * */ dprintk("%s: slot=%td seq=%d: Operation in progress\n", __func__, - res->sr_slot - res->sr_session->fc_slot_table.slots, - res->sr_slot->seq_nr); + slot - res->sr_session->fc_slot_table.slots, + slot->seq_nr); goto out_retry; default: /* Just update the slot sequence no. */ - ++res->sr_slot->seq_nr; + ++slot->seq_nr; } out: /* The session may be reset by one of the error handlers. */ @@ -637,6 +640,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); slot = tbl->slots + slotid; + slot->renewal_time = jiffies; args->sa_session = session; args->sa_slotid = slotid; @@ -644,7 +648,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, res->sr_session = session; res->sr_slot = slot; - res->sr_renewal_time = jiffies; res->sr_status_flags = 0; /* * sr_status is only set in decode_sequence, and so will remain diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a73ea89789d1..9cb1c63a70c2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -187,6 +187,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { + unsigned long renewal_time; u32 seq_nr; }; @@ -200,7 +201,6 @@ struct nfs4_sequence_res { struct nfs4_session *sr_session; struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ - unsigned long sr_renewal_time; u32 sr_status_flags; }; -- cgit v1.2.3 From 22a8578fca5a47e643bb4f70c232d0ec84db9e4e Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Sat, 10 Nov 2012 13:08:20 -0300 Subject: mtd: mtd_blkdevs: Replace request handler kthread with a workqueue By replacing a kthread with a workqueue, the code is now a bit clearer. There's also a slight reduction of code size (numbers apply for x86): Before: text data bss dec hex filename 3248 36 0 3284 cd4 drivers/mtd/mtd_blkdevs.o After: text data bss dec hex filename 3150 36 0 3186 c72 drivers/mtd/mtd_blkdevs.o Due to lack of real hardware, tests have been performed on an emulated environment with mtdswap and mtdblock over nandsim devices. Some real testing should be done, before merging this patch. Signed-off-by: Ezequiel Garcia Signed-off-by: Artem Bityutskiy --- drivers/mtd/mtd_blkdevs.c | 47 ++++++++++++++------------------------------ include/linux/mtd/blktrans.h | 4 +++- 2 files changed, 18 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 26ef2a72bdae..5ad39bb5ab4c 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include "mtdcore.h" @@ -121,16 +120,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev) { - if (kthread_should_stop()) - return 1; - return dev->bg_stop; } EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background); -static int mtd_blktrans_thread(void *arg) +static void mtd_blktrans_work(struct work_struct *work) { - struct mtd_blktrans_dev *dev = arg; + struct mtd_blktrans_dev *dev = + container_of(work, struct mtd_blktrans_dev, work); struct mtd_blktrans_ops *tr = dev->tr; struct request_queue *rq = dev->rq; struct request *req = NULL; @@ -138,7 +135,7 @@ static int mtd_blktrans_thread(void *arg) spin_lock_irq(rq->queue_lock); - while (!kthread_should_stop()) { + while (1) { int res; dev->bg_stop = false; @@ -156,15 +153,7 @@ static int mtd_blktrans_thread(void *arg) background_done = !dev->bg_stop; continue; } - set_current_state(TASK_INTERRUPTIBLE); - - if (kthread_should_stop()) - set_current_state(TASK_RUNNING); - - spin_unlock_irq(rq->queue_lock); - schedule(); - spin_lock_irq(rq->queue_lock); - continue; + break; } spin_unlock_irq(rq->queue_lock); @@ -185,8 +174,6 @@ static int mtd_blktrans_thread(void *arg) __blk_end_request_all(req, -EIO); spin_unlock_irq(rq->queue_lock); - - return 0; } static void mtd_blktrans_request(struct request_queue *rq) @@ -199,10 +186,8 @@ static void mtd_blktrans_request(struct request_queue *rq) if (!dev) while ((req = blk_fetch_request(rq)) != NULL) __blk_end_request_all(req, -ENODEV); - else { - dev->bg_stop = true; - wake_up_process(dev->thread); - } + else + queue_work(dev->wq, &dev->work); } static int blktrans_open(struct block_device *bdev, fmode_t mode) @@ -437,14 +422,13 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) gd->queue = new->rq; - /* Create processing thread */ - /* TODO: workqueue ? */ - new->thread = kthread_run(mtd_blktrans_thread, new, - "%s%d", tr->name, new->mtd->index); - if (IS_ERR(new->thread)) { - ret = PTR_ERR(new->thread); + /* Create processing workqueue */ + new->wq = alloc_workqueue("%s%d", 0, 0, + tr->name, new->mtd->index); + if (!new->wq) goto error4; - } + INIT_WORK(&new->work, mtd_blktrans_work); + gd->driverfs_dev = &new->mtd->dev; if (new->readonly) @@ -484,9 +468,8 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) /* Stop new requests to arrive */ del_gendisk(old->disk); - - /* Stop the thread */ - kthread_stop(old->thread); + /* Stop workqueue. This will perform any pending request. */ + destroy_workqueue(old->wq); /* Kill current requests */ spin_lock_irqsave(&old->queue_lock, flags); diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index ed270bd2e4df..4eb0a50d0c55 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -23,6 +23,7 @@ #include #include #include +#include struct hd_geometry; struct mtd_info; @@ -43,7 +44,8 @@ struct mtd_blktrans_dev { struct kref ref; struct gendisk *disk; struct attribute_group *disk_attributes; - struct task_struct *thread; + struct workqueue_struct *wq; + struct work_struct work; struct request_queue *rq; spinlock_t queue_lock; void *priv; -- cgit v1.2.3 From edd7eabc85e2f8d76a933b9639bebfe7f98861e4 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 14 Nov 2012 21:09:28 +0530 Subject: mfd: Add TI TPS80031 mfd core driver TPS80031/ TPS80032 Fully Integrated Power Management with Power Path and Battery Charger. The device provides five configurable step-down converters, 11 general purpose LDOs, USB OTG Module, ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with Power Path from USB, 32K clock generator. Add the mfd core driver for TPS80031/TPS80032. Signed-off-by: Laxman Dewangan Reviwed-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 14 + drivers/mfd/Makefile | 1 + drivers/mfd/tps80031.c | 573 ++++++++++++++++++++++++++++++++++++++ include/linux/mfd/tps80031.h | 637 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1225 insertions(+) create mode 100644 drivers/mfd/tps80031.c create mode 100644 include/linux/mfd/tps80031.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 59359a7a2493..ca633df7c330 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -265,6 +265,20 @@ config MFD_TPS65912_SPI If you say yes here you get support for the TPS65912 series of PM chips with SPI interface. +config MFD_TPS80031 + bool "TI TPS80031/TPS80032 Power Management chips" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + select IRQ_DOMAIN + help + If you say yes here you get support for the Texas Instruments + TPS80031/ TPS80032 Fully Integrated Power Management with Power + Path and Battery Charger. The device provides five configurable + step-down converters, 11 general purpose LDOs, USB OTG Module, + ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with + Power Path from USB, 32K clock generator. + config MENELAUS bool "Texas Instruments TWL92330/Menelaus PM chip" depends on I2C=y && ARCH_OMAP2 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 1c3ee7c76906..8072460e99d2 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -61,6 +61,7 @@ tps65912-objs := tps65912-core.o tps65912-irq.o obj-$(CONFIG_MFD_TPS65912) += tps65912.o obj-$(CONFIG_MFD_TPS65912_I2C) += tps65912-i2c.o obj-$(CONFIG_MFD_TPS65912_SPI) += tps65912-spi.o +obj-$(CONFIG_MFD_TPS80031) += tps80031.o obj-$(CONFIG_MENELAUS) += menelaus.o obj-$(CONFIG_TWL4030_CORE) += twl-core.o twl4030-irq.o twl6030-irq.o diff --git a/drivers/mfd/tps80031.c b/drivers/mfd/tps80031.c new file mode 100644 index 000000000000..f64005efa6fa --- /dev/null +++ b/drivers/mfd/tps80031.c @@ -0,0 +1,573 @@ +/* + * tps80031.c -- TI TPS80031/TPS80032 mfd core driver. + * + * MFD core driver for TI TPS80031/TPS80032 Fully Integrated + * Power Management with Power Path and Battery Charger + * + * Copyright (c) 2012, NVIDIA Corporation. + * + * Author: Laxman Dewangan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind, + * whether express or implied; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct resource tps80031_rtc_resources[] = { + { + .start = TPS80031_INT_RTC_ALARM, + .end = TPS80031_INT_RTC_ALARM, + .flags = IORESOURCE_IRQ, + }, +}; + +/* TPS80031 sub mfd devices */ +static struct mfd_cell tps80031_cell[] = { + { + .name = "tps80031-pmic", + }, + { + .name = "tps80031-clock", + }, + { + .name = "tps80031-rtc", + .num_resources = ARRAY_SIZE(tps80031_rtc_resources), + .resources = tps80031_rtc_resources, + }, + { + .name = "tps80031-gpadc", + }, + { + .name = "tps80031-fuel-gauge", + }, + { + .name = "tps80031-charger", + }, +}; + +static int tps80031_slave_address[TPS80031_NUM_SLAVES] = { + TPS80031_I2C_ID0_ADDR, + TPS80031_I2C_ID1_ADDR, + TPS80031_I2C_ID2_ADDR, + TPS80031_I2C_ID3_ADDR, +}; + +struct tps80031_pupd_data { + u8 reg; + u8 pullup_bit; + u8 pulldown_bit; +}; + +#define TPS80031_IRQ(_reg, _mask) \ + { \ + .reg_offset = (TPS80031_INT_MSK_LINE_##_reg) - \ + TPS80031_INT_MSK_LINE_A, \ + .mask = BIT(_mask), \ + } + +static const struct regmap_irq tps80031_main_irqs[] = { + [TPS80031_INT_PWRON] = TPS80031_IRQ(A, 0), + [TPS80031_INT_RPWRON] = TPS80031_IRQ(A, 1), + [TPS80031_INT_SYS_VLOW] = TPS80031_IRQ(A, 2), + [TPS80031_INT_RTC_ALARM] = TPS80031_IRQ(A, 3), + [TPS80031_INT_RTC_PERIOD] = TPS80031_IRQ(A, 4), + [TPS80031_INT_HOT_DIE] = TPS80031_IRQ(A, 5), + [TPS80031_INT_VXX_SHORT] = TPS80031_IRQ(A, 6), + [TPS80031_INT_SPDURATION] = TPS80031_IRQ(A, 7), + [TPS80031_INT_WATCHDOG] = TPS80031_IRQ(B, 0), + [TPS80031_INT_BAT] = TPS80031_IRQ(B, 1), + [TPS80031_INT_SIM] = TPS80031_IRQ(B, 2), + [TPS80031_INT_MMC] = TPS80031_IRQ(B, 3), + [TPS80031_INT_RES] = TPS80031_IRQ(B, 4), + [TPS80031_INT_GPADC_RT] = TPS80031_IRQ(B, 5), + [TPS80031_INT_GPADC_SW2_EOC] = TPS80031_IRQ(B, 6), + [TPS80031_INT_CC_AUTOCAL] = TPS80031_IRQ(B, 7), + [TPS80031_INT_ID_WKUP] = TPS80031_IRQ(C, 0), + [TPS80031_INT_VBUSS_WKUP] = TPS80031_IRQ(C, 1), + [TPS80031_INT_ID] = TPS80031_IRQ(C, 2), + [TPS80031_INT_VBUS] = TPS80031_IRQ(C, 3), + [TPS80031_INT_CHRG_CTRL] = TPS80031_IRQ(C, 4), + [TPS80031_INT_EXT_CHRG] = TPS80031_IRQ(C, 5), + [TPS80031_INT_INT_CHRG] = TPS80031_IRQ(C, 6), + [TPS80031_INT_RES2] = TPS80031_IRQ(C, 7), +}; + +static struct regmap_irq_chip tps80031_irq_chip = { + .name = "tps80031", + .irqs = tps80031_main_irqs, + .num_irqs = ARRAY_SIZE(tps80031_main_irqs), + .num_regs = 3, + .status_base = TPS80031_INT_STS_A, + .mask_base = TPS80031_INT_MSK_LINE_A, +}; + +#define PUPD_DATA(_reg, _pulldown_bit, _pullup_bit) \ + { \ + .reg = TPS80031_CFG_INPUT_PUPD##_reg, \ + .pulldown_bit = _pulldown_bit, \ + .pullup_bit = _pullup_bit, \ + } + +static const struct tps80031_pupd_data tps80031_pupds[] = { + [TPS80031_PREQ1] = PUPD_DATA(1, BIT(0), BIT(1)), + [TPS80031_PREQ2A] = PUPD_DATA(1, BIT(2), BIT(3)), + [TPS80031_PREQ2B] = PUPD_DATA(1, BIT(4), BIT(5)), + [TPS80031_PREQ2C] = PUPD_DATA(1, BIT(6), BIT(7)), + [TPS80031_PREQ3] = PUPD_DATA(2, BIT(0), BIT(1)), + [TPS80031_NRES_WARM] = PUPD_DATA(2, 0, BIT(2)), + [TPS80031_PWM_FORCE] = PUPD_DATA(2, BIT(5), 0), + [TPS80031_CHRG_EXT_CHRG_STATZ] = PUPD_DATA(2, 0, BIT(6)), + [TPS80031_SIM] = PUPD_DATA(3, BIT(0), BIT(1)), + [TPS80031_MMC] = PUPD_DATA(3, BIT(2), BIT(3)), + [TPS80031_GPADC_START] = PUPD_DATA(3, BIT(4), 0), + [TPS80031_DVSI2C_SCL] = PUPD_DATA(4, 0, BIT(0)), + [TPS80031_DVSI2C_SDA] = PUPD_DATA(4, 0, BIT(1)), + [TPS80031_CTLI2C_SCL] = PUPD_DATA(4, 0, BIT(2)), + [TPS80031_CTLI2C_SDA] = PUPD_DATA(4, 0, BIT(3)), +}; +static struct tps80031 *tps80031_power_off_dev; + +int tps80031_ext_power_req_config(struct device *dev, + unsigned long ext_ctrl_flag, int preq_bit, + int state_reg_add, int trans_reg_add) +{ + u8 res_ass_reg = 0; + int preq_mask_bit = 0; + int ret; + + if (!(ext_ctrl_flag & TPS80031_EXT_PWR_REQ)) + return 0; + + if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ1) { + res_ass_reg = TPS80031_PREQ1_RES_ASS_A + (preq_bit >> 3); + preq_mask_bit = 5; + } else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ2) { + res_ass_reg = TPS80031_PREQ2_RES_ASS_A + (preq_bit >> 3); + preq_mask_bit = 6; + } else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ3) { + res_ass_reg = TPS80031_PREQ3_RES_ASS_A + (preq_bit >> 3); + preq_mask_bit = 7; + } + + /* Configure REQ_ASS registers */ + ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, res_ass_reg, + BIT(preq_bit & 0x7)); + if (ret < 0) { + dev_err(dev, "reg 0x%02x setbit failed, err = %d\n", + res_ass_reg, ret); + return ret; + } + + /* Unmask the PREQ */ + ret = tps80031_clr_bits(dev, TPS80031_SLAVE_ID1, + TPS80031_PHOENIX_MSK_TRANSITION, BIT(preq_mask_bit)); + if (ret < 0) { + dev_err(dev, "reg 0x%02x clrbit failed, err = %d\n", + TPS80031_PHOENIX_MSK_TRANSITION, ret); + return ret; + } + + /* Switch regulator control to resource now */ + if (ext_ctrl_flag & (TPS80031_PWR_REQ_INPUT_PREQ2 | + TPS80031_PWR_REQ_INPUT_PREQ3)) { + ret = tps80031_update(dev, TPS80031_SLAVE_ID1, state_reg_add, + 0x0, TPS80031_STATE_MASK); + if (ret < 0) + dev_err(dev, "reg 0x%02x update failed, err = %d\n", + state_reg_add, ret); + } else { + ret = tps80031_update(dev, TPS80031_SLAVE_ID1, trans_reg_add, + TPS80031_TRANS_SLEEP_OFF, + TPS80031_TRANS_SLEEP_MASK); + if (ret < 0) + dev_err(dev, "reg 0x%02x update failed, err = %d\n", + trans_reg_add, ret); + } + return ret; +} +EXPORT_SYMBOL_GPL(tps80031_ext_power_req_config); + +static void tps80031_power_off(void) +{ + dev_info(tps80031_power_off_dev->dev, "switching off PMU\n"); + tps80031_write(tps80031_power_off_dev->dev, TPS80031_SLAVE_ID1, + TPS80031_PHOENIX_DEV_ON, TPS80031_DEVOFF); +} + +static void tps80031_pupd_init(struct tps80031 *tps80031, + struct tps80031_platform_data *pdata) +{ + struct tps80031_pupd_init_data *pupd_init_data = pdata->pupd_init_data; + int data_size = pdata->pupd_init_data_size; + int i; + + for (i = 0; i < data_size; ++i) { + struct tps80031_pupd_init_data *pupd_init = &pupd_init_data[i]; + const struct tps80031_pupd_data *pupd = + &tps80031_pupds[pupd_init->input_pin]; + u8 update_value = 0; + u8 update_mask = pupd->pulldown_bit | pupd->pullup_bit; + + if (pupd_init->setting == TPS80031_PUPD_PULLDOWN) + update_value = pupd->pulldown_bit; + else if (pupd_init->setting == TPS80031_PUPD_PULLUP) + update_value = pupd->pullup_bit; + + tps80031_update(tps80031->dev, TPS80031_SLAVE_ID1, pupd->reg, + update_value, update_mask); + } +} + +static int tps80031_init_ext_control(struct tps80031 *tps80031, + struct tps80031_platform_data *pdata) +{ + struct device *dev = tps80031->dev; + int ret; + int i; + + /* Clear all external control for this rail */ + for (i = 0; i < 9; ++i) { + ret = tps80031_write(dev, TPS80031_SLAVE_ID1, + TPS80031_PREQ1_RES_ASS_A + i, 0); + if (ret < 0) { + dev_err(dev, "reg 0x%02x write failed, err = %d\n", + TPS80031_PREQ1_RES_ASS_A + i, ret); + return ret; + } + } + + /* Mask the PREQ */ + ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, + TPS80031_PHOENIX_MSK_TRANSITION, 0x7 << 5); + if (ret < 0) { + dev_err(dev, "reg 0x%02x set_bits failed, err = %d\n", + TPS80031_PHOENIX_MSK_TRANSITION, ret); + return ret; + } + return ret; +} + +static int __devinit tps80031_irq_init(struct tps80031 *tps80031, int irq, + int irq_base) +{ + struct device *dev = tps80031->dev; + int i, ret; + + /* + * The MASK register used for updating status register when + * interrupt occurs and LINE register used to pass the status + * to actual interrupt line. As per datasheet: + * When INT_MSK_LINE [i] is set to 1, the associated interrupt + * number i is INT line masked, which means that no interrupt is + * generated on the INT line. + * When INT_MSK_LINE [i] is set to 0, the associated interrupt + * number i is line enabled: An interrupt is generated on the + * INT line. + * In any case, the INT_STS [i] status bit may or may not be updated, + * only linked to the INT_MSK_STS [i] configuration register bit. + * + * When INT_MSK_STS [i] is set to 1, the associated interrupt number + * i is status masked, which means that no interrupt is stored in + * the INT_STS[i] status bit. Note that no interrupt number i is + * generated on the INT line, even if the INT_MSK_LINE [i] register + * bit is set to 0. + * When INT_MSK_STS [i] is set to 0, the associated interrupt number i + * is status enabled: An interrupt status is updated in the INT_STS [i] + * register. The interrupt may or may not be generated on the INT line, + * depending on the INT_MSK_LINE [i] configuration register bit. + */ + for (i = 0; i < 3; i++) + tps80031_write(dev, TPS80031_SLAVE_ID2, + TPS80031_INT_MSK_STS_A + i, 0x00); + + ret = regmap_add_irq_chip(tps80031->regmap[TPS80031_SLAVE_ID2], irq, + IRQF_ONESHOT, irq_base, + &tps80031_irq_chip, &tps80031->irq_data); + if (ret < 0) { + dev_err(dev, "add irq failed, err = %d\n", ret); + return ret; + } + return ret; +} + +static bool rd_wr_reg_id0(struct device *dev, unsigned int reg) +{ + switch (reg) { + case TPS80031_SMPS1_CFG_FORCE ... TPS80031_SMPS2_CFG_VOLTAGE: + return true; + default: + return false; + } +} + +static bool rd_wr_reg_id1(struct device *dev, unsigned int reg) +{ + switch (reg) { + case TPS80031_SECONDS_REG ... TPS80031_RTC_RESET_STATUS_REG: + case TPS80031_VALIDITY0 ... TPS80031_VALIDITY7: + case TPS80031_PHOENIX_START_CONDITION ... TPS80031_KEY_PRESS_DUR_CFG: + case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE: + case TPS80031_BROADCAST_ADDR_ALL ... TPS80031_BROADCAST_ADDR_CLK_RST: + case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE: + case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE: + case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C: + case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING: + case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD: + case TPS80031_BACKUP_REG: + return true; + default: + return false; + } +} + +static bool is_volatile_reg_id1(struct device *dev, unsigned int reg) +{ + switch (reg) { + case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE: + case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE: + case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE: + case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C: + case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING: + case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD: + return true; + default: + return false; + } +} + +static bool rd_wr_reg_id2(struct device *dev, unsigned int reg) +{ + switch (reg) { + case TPS80031_USB_VENDOR_ID_LSB ... TPS80031_USB_OTG_REVISION: + case TPS80031_GPADC_CTRL ... TPS80031_CTRL_P1: + case TPS80031_RTCH0_LSB ... TPS80031_GPCH0_MSB: + case TPS80031_TOGGLE1 ... TPS80031_VIBMODE: + case TPS80031_PWM1ON ... TPS80031_PWM2OFF: + case TPS80031_FG_REG_00 ... TPS80031_FG_REG_11: + case TPS80031_INT_STS_A ... TPS80031_INT_MSK_STS_C: + case TPS80031_CONTROLLER_CTRL2 ... TPS80031_LED_PWM_CTRL2: + return true; + default: + return false; + } +} + +static bool rd_wr_reg_id3(struct device *dev, unsigned int reg) +{ + switch (reg) { + case TPS80031_GPADC_TRIM0 ... TPS80031_GPADC_TRIM18: + return true; + default: + return false; + } +} + +static const struct regmap_config tps80031_regmap_configs[] = { + { + .reg_bits = 8, + .val_bits = 8, + .writeable_reg = rd_wr_reg_id0, + .readable_reg = rd_wr_reg_id0, + .max_register = TPS80031_MAX_REGISTER, + }, + { + .reg_bits = 8, + .val_bits = 8, + .writeable_reg = rd_wr_reg_id1, + .readable_reg = rd_wr_reg_id1, + .volatile_reg = is_volatile_reg_id1, + .max_register = TPS80031_MAX_REGISTER, + }, + { + .reg_bits = 8, + .val_bits = 8, + .writeable_reg = rd_wr_reg_id2, + .readable_reg = rd_wr_reg_id2, + .max_register = TPS80031_MAX_REGISTER, + }, + { + .reg_bits = 8, + .val_bits = 8, + .writeable_reg = rd_wr_reg_id3, + .readable_reg = rd_wr_reg_id3, + .max_register = TPS80031_MAX_REGISTER, + }, +}; + +static int __devinit tps80031_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct tps80031_platform_data *pdata = client->dev.platform_data; + struct tps80031 *tps80031; + int ret; + uint8_t es_version; + uint8_t ep_ver; + int i; + + if (!pdata) { + dev_err(&client->dev, "tps80031 requires platform data\n"); + return -EINVAL; + } + + tps80031 = devm_kzalloc(&client->dev, sizeof(*tps80031), GFP_KERNEL); + if (!tps80031) { + dev_err(&client->dev, "Malloc failed for tps80031\n"); + return -ENOMEM; + } + + for (i = 0; i < TPS80031_NUM_SLAVES; i++) { + if (tps80031_slave_address[i] == client->addr) + tps80031->clients[i] = client; + else + tps80031->clients[i] = i2c_new_dummy(client->adapter, + tps80031_slave_address[i]); + if (!tps80031->clients[i]) { + dev_err(&client->dev, "can't attach client %d\n", i); + ret = -ENOMEM; + goto fail_client_reg; + } + + i2c_set_clientdata(tps80031->clients[i], tps80031); + tps80031->regmap[i] = devm_regmap_init_i2c(tps80031->clients[i], + &tps80031_regmap_configs[i]); + if (IS_ERR(tps80031->regmap[i])) { + ret = PTR_ERR(tps80031->regmap[i]); + dev_err(&client->dev, + "regmap %d init failed, err %d\n", i, ret); + goto fail_client_reg; + } + } + + ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3, + TPS80031_JTAGVERNUM, &es_version); + if (ret < 0) { + dev_err(&client->dev, + "Silicon version number read failed: %d\n", ret); + goto fail_client_reg; + } + + ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3, + TPS80031_EPROM_REV, &ep_ver); + if (ret < 0) { + dev_err(&client->dev, + "Silicon eeprom version read failed: %d\n", ret); + goto fail_client_reg; + } + + dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n", + es_version, ep_ver); + tps80031->es_version = es_version; + tps80031->dev = &client->dev; + i2c_set_clientdata(client, tps80031); + tps80031->chip_info = id->driver_data; + + ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base); + if (ret) { + dev_err(&client->dev, "IRQ init failed: %d\n", ret); + goto fail_client_reg; + } + + tps80031_pupd_init(tps80031, pdata); + + tps80031_init_ext_control(tps80031, pdata); + + ret = mfd_add_devices(tps80031->dev, -1, + tps80031_cell, ARRAY_SIZE(tps80031_cell), + NULL, 0, + regmap_irq_get_domain(tps80031->irq_data)); + if (ret < 0) { + dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret); + goto fail_mfd_add; + } + + if (pdata->use_power_off && !pm_power_off) { + tps80031_power_off_dev = tps80031; + pm_power_off = tps80031_power_off; + } + return 0; + +fail_mfd_add: + regmap_del_irq_chip(client->irq, tps80031->irq_data); + +fail_client_reg: + for (i = 0; i < TPS80031_NUM_SLAVES; i++) { + if (tps80031->clients[i] && (tps80031->clients[i] != client)) + i2c_unregister_device(tps80031->clients[i]); + } + return ret; +} + +static int __devexit tps80031_remove(struct i2c_client *client) +{ + struct tps80031 *tps80031 = i2c_get_clientdata(client); + int i; + + if (tps80031_power_off_dev == tps80031) { + tps80031_power_off_dev = NULL; + pm_power_off = NULL; + } + + mfd_remove_devices(tps80031->dev); + + regmap_del_irq_chip(client->irq, tps80031->irq_data); + + for (i = 0; i < TPS80031_NUM_SLAVES; i++) { + if (tps80031->clients[i] != client) + i2c_unregister_device(tps80031->clients[i]); + } + return 0; +} + +static const struct i2c_device_id tps80031_id_table[] = { + { "tps80031", TPS80031 }, + { "tps80032", TPS80032 }, +}; +MODULE_DEVICE_TABLE(i2c, tps80031_id_table); + +static struct i2c_driver tps80031_driver = { + .driver = { + .name = "tps80031", + .owner = THIS_MODULE, + }, + .probe = tps80031_probe, + .remove = __devexit_p(tps80031_remove), + .id_table = tps80031_id_table, +}; + +static int __init tps80031_init(void) +{ + return i2c_add_driver(&tps80031_driver); +} +subsys_initcall(tps80031_init); + +static void __exit tps80031_exit(void) +{ + i2c_del_driver(&tps80031_driver); +} +module_exit(tps80031_exit); + +MODULE_AUTHOR("Laxman Dewangan "); +MODULE_DESCRIPTION("TPS80031 core driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/tps80031.h b/include/linux/mfd/tps80031.h new file mode 100644 index 000000000000..2c75c9c9318f --- /dev/null +++ b/include/linux/mfd/tps80031.h @@ -0,0 +1,637 @@ +/* + * tps80031.h -- TI TPS80031 and TI TPS80032 PMIC driver. + * + * Copyright (c) 2012, NVIDIA Corporation. + * + * Author: Laxman Dewangan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind, + * whether express or implied; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA + */ + +#ifndef __LINUX_MFD_TPS80031_H +#define __LINUX_MFD_TPS80031_H + +#include +#include + +/* Pull-ups/Pull-downs */ +#define TPS80031_CFG_INPUT_PUPD1 0xF0 +#define TPS80031_CFG_INPUT_PUPD2 0xF1 +#define TPS80031_CFG_INPUT_PUPD3 0xF2 +#define TPS80031_CFG_INPUT_PUPD4 0xF3 +#define TPS80031_CFG_LDO_PD1 0xF4 +#define TPS80031_CFG_LDO_PD2 0xF5 +#define TPS80031_CFG_SMPS_PD 0xF6 + +/* Real Time Clock */ +#define TPS80031_SECONDS_REG 0x00 +#define TPS80031_MINUTES_REG 0x01 +#define TPS80031_HOURS_REG 0x02 +#define TPS80031_DAYS_REG 0x03 +#define TPS80031_MONTHS_REG 0x04 +#define TPS80031_YEARS_REG 0x05 +#define TPS80031_WEEKS_REG 0x06 +#define TPS80031_ALARM_SECONDS_REG 0x08 +#define TPS80031_ALARM_MINUTES_REG 0x09 +#define TPS80031_ALARM_HOURS_REG 0x0A +#define TPS80031_ALARM_DAYS_REG 0x0B +#define TPS80031_ALARM_MONTHS_REG 0x0C +#define TPS80031_ALARM_YEARS_REG 0x0D +#define TPS80031_RTC_CTRL_REG 0x10 +#define TPS80031_RTC_STATUS_REG 0x11 +#define TPS80031_RTC_INTERRUPTS_REG 0x12 +#define TPS80031_RTC_COMP_LSB_REG 0x13 +#define TPS80031_RTC_COMP_MSB_REG 0x14 +#define TPS80031_RTC_RESET_STATUS_REG 0x16 + +/*PMC Master Module */ +#define TPS80031_PHOENIX_START_CONDITION 0x1F +#define TPS80031_PHOENIX_MSK_TRANSITION 0x20 +#define TPS80031_STS_HW_CONDITIONS 0x21 +#define TPS80031_PHOENIX_LAST_TURNOFF_STS 0x22 +#define TPS80031_VSYSMIN_LO_THRESHOLD 0x23 +#define TPS80031_VSYSMIN_HI_THRESHOLD 0x24 +#define TPS80031_PHOENIX_DEV_ON 0x25 +#define TPS80031_STS_PWR_GRP_STATE 0x27 +#define TPS80031_PH_CFG_VSYSLOW 0x28 +#define TPS80031_PH_STS_BOOT 0x29 +#define TPS80031_PHOENIX_SENS_TRANSITION 0x2A +#define TPS80031_PHOENIX_SEQ_CFG 0x2B +#define TPS80031_PRIMARY_WATCHDOG_CFG 0X2C +#define TPS80031_KEY_PRESS_DUR_CFG 0X2D +#define TPS80031_SMPS_LDO_SHORT_STS 0x2E + +/* PMC Slave Module - Broadcast */ +#define TPS80031_BROADCAST_ADDR_ALL 0x31 +#define TPS80031_BROADCAST_ADDR_REF 0x32 +#define TPS80031_BROADCAST_ADDR_PROV 0x33 +#define TPS80031_BROADCAST_ADDR_CLK_RST 0x34 + +/* PMC Slave Module SMPS Regulators */ +#define TPS80031_SMPS4_CFG_TRANS 0x41 +#define TPS80031_SMPS4_CFG_STATE 0x42 +#define TPS80031_SMPS4_CFG_VOLTAGE 0x44 +#define TPS80031_VIO_CFG_TRANS 0x47 +#define TPS80031_VIO_CFG_STATE 0x48 +#define TPS80031_VIO_CFG_FORCE 0x49 +#define TPS80031_VIO_CFG_VOLTAGE 0x4A +#define TPS80031_VIO_CFG_STEP 0x48 +#define TPS80031_SMPS1_CFG_TRANS 0x53 +#define TPS80031_SMPS1_CFG_STATE 0x54 +#define TPS80031_SMPS1_CFG_FORCE 0x55 +#define TPS80031_SMPS1_CFG_VOLTAGE 0x56 +#define TPS80031_SMPS1_CFG_STEP 0x57 +#define TPS80031_SMPS2_CFG_TRANS 0x59 +#define TPS80031_SMPS2_CFG_STATE 0x5A +#define TPS80031_SMPS2_CFG_FORCE 0x5B +#define TPS80031_SMPS2_CFG_VOLTAGE 0x5C +#define TPS80031_SMPS2_CFG_STEP 0x5D +#define TPS80031_SMPS3_CFG_TRANS 0x65 +#define TPS80031_SMPS3_CFG_STATE 0x66 +#define TPS80031_SMPS3_CFG_VOLTAGE 0x68 + +/* PMC Slave Module LDO Regulators */ +#define TPS80031_VANA_CFG_TRANS 0x81 +#define TPS80031_VANA_CFG_STATE 0x82 +#define TPS80031_VANA_CFG_VOLTAGE 0x83 +#define TPS80031_LDO2_CFG_TRANS 0x85 +#define TPS80031_LDO2_CFG_STATE 0x86 +#define TPS80031_LDO2_CFG_VOLTAGE 0x87 +#define TPS80031_LDO4_CFG_TRANS 0x89 +#define TPS80031_LDO4_CFG_STATE 0x8A +#define TPS80031_LDO4_CFG_VOLTAGE 0x8B +#define TPS80031_LDO3_CFG_TRANS 0x8D +#define TPS80031_LDO3_CFG_STATE 0x8E +#define TPS80031_LDO3_CFG_VOLTAGE 0x8F +#define TPS80031_LDO6_CFG_TRANS 0x91 +#define TPS80031_LDO6_CFG_STATE 0x92 +#define TPS80031_LDO6_CFG_VOLTAGE 0x93 +#define TPS80031_LDOLN_CFG_TRANS 0x95 +#define TPS80031_LDOLN_CFG_STATE 0x96 +#define TPS80031_LDOLN_CFG_VOLTAGE 0x97 +#define TPS80031_LDO5_CFG_TRANS 0x99 +#define TPS80031_LDO5_CFG_STATE 0x9A +#define TPS80031_LDO5_CFG_VOLTAGE 0x9B +#define TPS80031_LDO1_CFG_TRANS 0x9D +#define TPS80031_LDO1_CFG_STATE 0x9E +#define TPS80031_LDO1_CFG_VOLTAGE 0x9F +#define TPS80031_LDOUSB_CFG_TRANS 0xA1 +#define TPS80031_LDOUSB_CFG_STATE 0xA2 +#define TPS80031_LDOUSB_CFG_VOLTAGE 0xA3 +#define TPS80031_LDO7_CFG_TRANS 0xA5 +#define TPS80031_LDO7_CFG_STATE 0xA6 +#define TPS80031_LDO7_CFG_VOLTAGE 0xA7 + +/* PMC Slave Module External Control */ +#define TPS80031_REGEN1_CFG_TRANS 0xAE +#define TPS80031_REGEN1_CFG_STATE 0xAF +#define TPS80031_REGEN2_CFG_TRANS 0xB1 +#define TPS80031_REGEN2_CFG_STATE 0xB2 +#define TPS80031_SYSEN_CFG_TRANS 0xB4 +#define TPS80031_SYSEN_CFG_STATE 0xB5 + +/* PMC Slave Module Internal Control */ +#define TPS80031_NRESPWRON_CFG_TRANS 0xB7 +#define TPS80031_NRESPWRON_CFG_STATE 0xB8 +#define TPS80031_CLK32KAO_CFG_TRANS 0xBA +#define TPS80031_CLK32KAO_CFG_STATE 0xBB +#define TPS80031_CLK32KG_CFG_TRANS 0xBD +#define TPS80031_CLK32KG_CFG_STATE 0xBE +#define TPS80031_CLK32KAUDIO_CFG_TRANS 0xC0 +#define TPS80031_CLK32KAUDIO_CFG_STATE 0xC1 +#define TPS80031_VRTC_CFG_TRANS 0xC3 +#define TPS80031_VRTC_CFG_STATE 0xC4 +#define TPS80031_BIAS_CFG_TRANS 0xC6 +#define TPS80031_BIAS_CFG_STATE 0xC7 +#define TPS80031_VSYSMIN_HI_CFG_TRANS 0xC9 +#define TPS80031_VSYSMIN_HI_CFG_STATE 0xCA +#define TPS80031_RC6MHZ_CFG_TRANS 0xCC +#define TPS80031_RC6MHZ_CFG_STATE 0xCD +#define TPS80031_TMP_CFG_TRANS 0xCF +#define TPS80031_TMP_CFG_STATE 0xD0 + +/* PMC Slave Module resources assignment */ +#define TPS80031_PREQ1_RES_ASS_A 0xD7 +#define TPS80031_PREQ1_RES_ASS_B 0xD8 +#define TPS80031_PREQ1_RES_ASS_C 0xD9 +#define TPS80031_PREQ2_RES_ASS_A 0xDA +#define TPS80031_PREQ2_RES_ASS_B 0xDB +#define TPS80031_PREQ2_RES_ASS_C 0xDC +#define TPS80031_PREQ3_RES_ASS_A 0xDD +#define TPS80031_PREQ3_RES_ASS_B 0xDE +#define TPS80031_PREQ3_RES_ASS_C 0xDF + +/* PMC Slave Module Miscellaneous */ +#define TPS80031_SMPS_OFFSET 0xE0 +#define TPS80031_SMPS_MULT 0xE3 +#define TPS80031_MISC1 0xE4 +#define TPS80031_MISC2 0xE5 +#define TPS80031_BBSPOR_CFG 0xE6 +#define TPS80031_TMP_CFG 0xE7 + +/* Battery Charging Controller and Indicator LED */ +#define TPS80031_CONTROLLER_CTRL2 0xDA +#define TPS80031_CONTROLLER_VSEL_COMP 0xDB +#define TPS80031_CHARGERUSB_VSYSREG 0xDC +#define TPS80031_CHARGERUSB_VICHRG_PC 0xDD +#define TPS80031_LINEAR_CHRG_STS 0xDE +#define TPS80031_CONTROLLER_INT_MASK 0xE0 +#define TPS80031_CONTROLLER_CTRL1 0xE1 +#define TPS80031_CONTROLLER_WDG 0xE2 +#define TPS80031_CONTROLLER_STAT1 0xE3 +#define TPS80031_CHARGERUSB_INT_STATUS 0xE4 +#define TPS80031_CHARGERUSB_INT_MASK 0xE5 +#define TPS80031_CHARGERUSB_STATUS_INT1 0xE6 +#define TPS80031_CHARGERUSB_STATUS_INT2 0xE7 +#define TPS80031_CHARGERUSB_CTRL1 0xE8 +#define TPS80031_CHARGERUSB_CTRL2 0xE9 +#define TPS80031_CHARGERUSB_CTRL3 0xEA +#define TPS80031_CHARGERUSB_STAT1 0xEB +#define TPS80031_CHARGERUSB_VOREG 0xEC +#define TPS80031_CHARGERUSB_VICHRG 0xED +#define TPS80031_CHARGERUSB_CINLIMIT 0xEE +#define TPS80031_CHARGERUSB_CTRLLIMIT1 0xEF +#define TPS80031_CHARGERUSB_CTRLLIMIT2 0xF0 +#define TPS80031_LED_PWM_CTRL1 0xF4 +#define TPS80031_LED_PWM_CTRL2 0xF5 + +/* USB On-The-Go */ +#define TPS80031_BACKUP_REG 0xFA +#define TPS80031_USB_VENDOR_ID_LSB 0x00 +#define TPS80031_USB_VENDOR_ID_MSB 0x01 +#define TPS80031_USB_PRODUCT_ID_LSB 0x02 +#define TPS80031_USB_PRODUCT_ID_MSB 0x03 +#define TPS80031_USB_VBUS_CTRL_SET 0x04 +#define TPS80031_USB_VBUS_CTRL_CLR 0x05 +#define TPS80031_USB_ID_CTRL_SET 0x06 +#define TPS80031_USB_ID_CTRL_CLR 0x07 +#define TPS80031_USB_VBUS_INT_SRC 0x08 +#define TPS80031_USB_VBUS_INT_LATCH_SET 0x09 +#define TPS80031_USB_VBUS_INT_LATCH_CLR 0x0A +#define TPS80031_USB_VBUS_INT_EN_LO_SET 0x0B +#define TPS80031_USB_VBUS_INT_EN_LO_CLR 0x0C +#define TPS80031_USB_VBUS_INT_EN_HI_SET 0x0D +#define TPS80031_USB_VBUS_INT_EN_HI_CLR 0x0E +#define TPS80031_USB_ID_INT_SRC 0x0F +#define TPS80031_USB_ID_INT_LATCH_SET 0x10 +#define TPS80031_USB_ID_INT_LATCH_CLR 0x11 +#define TPS80031_USB_ID_INT_EN_LO_SET 0x12 +#define TPS80031_USB_ID_INT_EN_LO_CLR 0x13 +#define TPS80031_USB_ID_INT_EN_HI_SET 0x14 +#define TPS80031_USB_ID_INT_EN_HI_CLR 0x15 +#define TPS80031_USB_OTG_ADP_CTRL 0x16 +#define TPS80031_USB_OTG_ADP_HIGH 0x17 +#define TPS80031_USB_OTG_ADP_LOW 0x18 +#define TPS80031_USB_OTG_ADP_RISE 0x19 +#define TPS80031_USB_OTG_REVISION 0x1A + +/* Gas Gauge */ +#define TPS80031_FG_REG_00 0xC0 +#define TPS80031_FG_REG_01 0xC1 +#define TPS80031_FG_REG_02 0xC2 +#define TPS80031_FG_REG_03 0xC3 +#define TPS80031_FG_REG_04 0xC4 +#define TPS80031_FG_REG_05 0xC5 +#define TPS80031_FG_REG_06 0xC6 +#define TPS80031_FG_REG_07 0xC7 +#define TPS80031_FG_REG_08 0xC8 +#define TPS80031_FG_REG_09 0xC9 +#define TPS80031_FG_REG_10 0xCA +#define TPS80031_FG_REG_11 0xCB + +/* General Purpose ADC */ +#define TPS80031_GPADC_CTRL 0x2E +#define TPS80031_GPADC_CTRL2 0x2F +#define TPS80031_RTSELECT_LSB 0x32 +#define TPS80031_RTSELECT_ISB 0x33 +#define TPS80031_RTSELECT_MSB 0x34 +#define TPS80031_GPSELECT_ISB 0x35 +#define TPS80031_CTRL_P1 0x36 +#define TPS80031_RTCH0_LSB 0x37 +#define TPS80031_RTCH0_MSB 0x38 +#define TPS80031_RTCH1_LSB 0x39 +#define TPS80031_RTCH1_MSB 0x3A +#define TPS80031_GPCH0_LSB 0x3B +#define TPS80031_GPCH0_MSB 0x3C + +/* SIM, MMC and Battery Detection */ +#define TPS80031_SIMDEBOUNCING 0xEB +#define TPS80031_SIMCTRL 0xEC +#define TPS80031_MMCDEBOUNCING 0xED +#define TPS80031_MMCCTRL 0xEE +#define TPS80031_BATDEBOUNCING 0xEF + +/* Vibrator Driver and PWMs */ +#define TPS80031_VIBCTRL 0x9B +#define TPS80031_VIBMODE 0x9C +#define TPS80031_PWM1ON 0xBA +#define TPS80031_PWM1OFF 0xBB +#define TPS80031_PWM2ON 0xBD +#define TPS80031_PWM2OFF 0xBE + +/* Control Interface */ +#define TPS80031_INT_STS_A 0xD0 +#define TPS80031_INT_STS_B 0xD1 +#define TPS80031_INT_STS_C 0xD2 +#define TPS80031_INT_MSK_LINE_A 0xD3 +#define TPS80031_INT_MSK_LINE_B 0xD4 +#define TPS80031_INT_MSK_LINE_C 0xD5 +#define TPS80031_INT_MSK_STS_A 0xD6 +#define TPS80031_INT_MSK_STS_B 0xD7 +#define TPS80031_INT_MSK_STS_C 0xD8 +#define TPS80031_TOGGLE1 0x90 +#define TPS80031_TOGGLE2 0x91 +#define TPS80031_TOGGLE3 0x92 +#define TPS80031_PWDNSTATUS1 0x93 +#define TPS80031_PWDNSTATUS2 0x94 +#define TPS80031_VALIDITY0 0x17 +#define TPS80031_VALIDITY1 0x18 +#define TPS80031_VALIDITY2 0x19 +#define TPS80031_VALIDITY3 0x1A +#define TPS80031_VALIDITY4 0x1B +#define TPS80031_VALIDITY5 0x1C +#define TPS80031_VALIDITY6 0x1D +#define TPS80031_VALIDITY7 0x1E + +/* Version number related register */ +#define TPS80031_JTAGVERNUM 0x87 +#define TPS80031_EPROM_REV 0xDF + +/* GPADC Trimming Bits. */ +#define TPS80031_GPADC_TRIM0 0xCC +#define TPS80031_GPADC_TRIM1 0xCD +#define TPS80031_GPADC_TRIM2 0xCE +#define TPS80031_GPADC_TRIM3 0xCF +#define TPS80031_GPADC_TRIM4 0xD0 +#define TPS80031_GPADC_TRIM5 0xD1 +#define TPS80031_GPADC_TRIM6 0xD2 +#define TPS80031_GPADC_TRIM7 0xD3 +#define TPS80031_GPADC_TRIM8 0xD4 +#define TPS80031_GPADC_TRIM9 0xD5 +#define TPS80031_GPADC_TRIM10 0xD6 +#define TPS80031_GPADC_TRIM11 0xD7 +#define TPS80031_GPADC_TRIM12 0xD8 +#define TPS80031_GPADC_TRIM13 0xD9 +#define TPS80031_GPADC_TRIM14 0xDA +#define TPS80031_GPADC_TRIM15 0xDB +#define TPS80031_GPADC_TRIM16 0xDC +#define TPS80031_GPADC_TRIM17 0xDD +#define TPS80031_GPADC_TRIM18 0xDE + +/* TPS80031_CONTROLLER_STAT1 bit fields */ +#define TPS80031_CONTROLLER_STAT1_BAT_TEMP 0 +#define TPS80031_CONTROLLER_STAT1_BAT_REMOVED 1 +#define TPS80031_CONTROLLER_STAT1_VBUS_DET 2 +#define TPS80031_CONTROLLER_STAT1_VAC_DET 3 +#define TPS80031_CONTROLLER_STAT1_FAULT_WDG 4 +#define TPS80031_CONTROLLER_STAT1_LINCH_GATED 6 +/* TPS80031_CONTROLLER_INT_MASK bit filed */ +#define TPS80031_CONTROLLER_INT_MASK_MVAC_DET 0 +#define TPS80031_CONTROLLER_INT_MASK_MVBUS_DET 1 +#define TPS80031_CONTROLLER_INT_MASK_MBAT_TEMP 2 +#define TPS80031_CONTROLLER_INT_MASK_MFAULT_WDG 3 +#define TPS80031_CONTROLLER_INT_MASK_MBAT_REMOVED 4 +#define TPS80031_CONTROLLER_INT_MASK_MLINCH_GATED 5 + +#define TPS80031_CHARGE_CONTROL_SUB_INT_MASK 0x3F + +/* TPS80031_PHOENIX_DEV_ON bit field */ +#define TPS80031_DEVOFF 0x1 + +#define TPS80031_EXT_CONTROL_CFG_TRANS 0 +#define TPS80031_EXT_CONTROL_CFG_STATE 1 + +/* State register field */ +#define TPS80031_STATE_OFF 0x00 +#define TPS80031_STATE_ON 0x01 +#define TPS80031_STATE_MASK 0x03 + +/* Trans register field */ +#define TPS80031_TRANS_ACTIVE_OFF 0x00 +#define TPS80031_TRANS_ACTIVE_ON 0x01 +#define TPS80031_TRANS_ACTIVE_MASK 0x03 +#define TPS80031_TRANS_SLEEP_OFF 0x00 +#define TPS80031_TRANS_SLEEP_ON 0x04 +#define TPS80031_TRANS_SLEEP_MASK 0x0C +#define TPS80031_TRANS_OFF_OFF 0x00 +#define TPS80031_TRANS_OFF_ACTIVE 0x10 +#define TPS80031_TRANS_OFF_MASK 0x30 + +#define TPS80031_EXT_PWR_REQ (TPS80031_PWR_REQ_INPUT_PREQ1 | \ + TPS80031_PWR_REQ_INPUT_PREQ2 | \ + TPS80031_PWR_REQ_INPUT_PREQ3) + +/* TPS80031_BBSPOR_CFG bit field */ +#define TPS80031_BBSPOR_CHG_EN 0x8 +#define TPS80031_MAX_REGISTER 0xFF + +struct i2c_client; + +/* Supported chips */ +enum chips { + TPS80031 = 0x00000001, + TPS80032 = 0x00000002, +}; + +enum { + TPS80031_INT_PWRON, + TPS80031_INT_RPWRON, + TPS80031_INT_SYS_VLOW, + TPS80031_INT_RTC_ALARM, + TPS80031_INT_RTC_PERIOD, + TPS80031_INT_HOT_DIE, + TPS80031_INT_VXX_SHORT, + TPS80031_INT_SPDURATION, + TPS80031_INT_WATCHDOG, + TPS80031_INT_BAT, + TPS80031_INT_SIM, + TPS80031_INT_MMC, + TPS80031_INT_RES, + TPS80031_INT_GPADC_RT, + TPS80031_INT_GPADC_SW2_EOC, + TPS80031_INT_CC_AUTOCAL, + TPS80031_INT_ID_WKUP, + TPS80031_INT_VBUSS_WKUP, + TPS80031_INT_ID, + TPS80031_INT_VBUS, + TPS80031_INT_CHRG_CTRL, + TPS80031_INT_EXT_CHRG, + TPS80031_INT_INT_CHRG, + TPS80031_INT_RES2, + TPS80031_INT_BAT_TEMP_OVRANGE, + TPS80031_INT_BAT_REMOVED, + TPS80031_INT_VBUS_DET, + TPS80031_INT_VAC_DET, + TPS80031_INT_FAULT_WDG, + TPS80031_INT_LINCH_GATED, + + /* Last interrupt id to get the end number */ + TPS80031_INT_NR, +}; + +/* TPS80031 Slave IDs */ +#define TPS80031_NUM_SLAVES 4 +#define TPS80031_SLAVE_ID0 0 +#define TPS80031_SLAVE_ID1 1 +#define TPS80031_SLAVE_ID2 2 +#define TPS80031_SLAVE_ID3 3 + +/* TPS80031 I2C addresses */ +#define TPS80031_I2C_ID0_ADDR 0x12 +#define TPS80031_I2C_ID1_ADDR 0x48 +#define TPS80031_I2C_ID2_ADDR 0x49 +#define TPS80031_I2C_ID3_ADDR 0x4A + +enum { + TPS80031_REGULATOR_VIO, + TPS80031_REGULATOR_SMPS1, + TPS80031_REGULATOR_SMPS2, + TPS80031_REGULATOR_SMPS3, + TPS80031_REGULATOR_SMPS4, + TPS80031_REGULATOR_VANA, + TPS80031_REGULATOR_LDO1, + TPS80031_REGULATOR_LDO2, + TPS80031_REGULATOR_LDO3, + TPS80031_REGULATOR_LDO4, + TPS80031_REGULATOR_LDO5, + TPS80031_REGULATOR_LDO6, + TPS80031_REGULATOR_LDO7, + TPS80031_REGULATOR_LDOLN, + TPS80031_REGULATOR_LDOUSB, + TPS80031_REGULATOR_VBUS, + TPS80031_REGULATOR_REGEN1, + TPS80031_REGULATOR_REGEN2, + TPS80031_REGULATOR_SYSEN, + TPS80031_REGULATOR_MAX, +}; + +/* Different configurations for the rails */ +enum { + /* USBLDO input selection */ + TPS80031_USBLDO_INPUT_VSYS = 0x00000001, + TPS80031_USBLDO_INPUT_PMID = 0x00000002, + + /* LDO3 output mode */ + TPS80031_LDO3_OUTPUT_VIB = 0x00000004, + + /* VBUS configuration */ + TPS80031_VBUS_DISCHRG_EN_PDN = 0x00000004, + TPS80031_VBUS_SW_ONLY = 0x00000008, + TPS80031_VBUS_SW_N_ID = 0x00000010, +}; + +/* External controls requests */ +enum tps80031_ext_control { + TPS80031_PWR_REQ_INPUT_NONE = 0x00000000, + TPS80031_PWR_REQ_INPUT_PREQ1 = 0x00000001, + TPS80031_PWR_REQ_INPUT_PREQ2 = 0x00000002, + TPS80031_PWR_REQ_INPUT_PREQ3 = 0x00000004, + TPS80031_PWR_OFF_ON_SLEEP = 0x00000008, + TPS80031_PWR_ON_ON_SLEEP = 0x00000010, +}; + +enum tps80031_pupd_pins { + TPS80031_PREQ1 = 0, + TPS80031_PREQ2A, + TPS80031_PREQ2B, + TPS80031_PREQ2C, + TPS80031_PREQ3, + TPS80031_NRES_WARM, + TPS80031_PWM_FORCE, + TPS80031_CHRG_EXT_CHRG_STATZ, + TPS80031_SIM, + TPS80031_MMC, + TPS80031_GPADC_START, + TPS80031_DVSI2C_SCL, + TPS80031_DVSI2C_SDA, + TPS80031_CTLI2C_SCL, + TPS80031_CTLI2C_SDA, +}; + +enum tps80031_pupd_settings { + TPS80031_PUPD_NORMAL, + TPS80031_PUPD_PULLDOWN, + TPS80031_PUPD_PULLUP, +}; + +struct tps80031 { + struct device *dev; + unsigned long chip_info; + int es_version; + struct i2c_client *clients[TPS80031_NUM_SLAVES]; + struct regmap *regmap[TPS80031_NUM_SLAVES]; + struct regmap_irq_chip_data *irq_data; +}; + +struct tps80031_pupd_init_data { + int input_pin; + int setting; +}; + +/* + * struct tps80031_regulator_platform_data - tps80031 regulator platform data. + * + * @reg_init_data: The regulator init data. + * @ext_ctrl_flag: External control flag for sleep/power request control. + * @config_flags: Configuration flag to configure the rails. + * It should be ORed of config enums. + */ + +struct tps80031_regulator_platform_data { + struct regulator_init_data *reg_init_data; + unsigned int ext_ctrl_flag; + unsigned int config_flags; +}; + +struct tps80031_platform_data { + int irq_base; + bool use_power_off; + struct tps80031_pupd_init_data *pupd_init_data; + int pupd_init_data_size; + struct tps80031_regulator_platform_data + *regulator_pdata[TPS80031_REGULATOR_MAX]; +}; + +static inline int tps80031_write(struct device *dev, int sid, + int reg, uint8_t val) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return regmap_write(tps80031->regmap[sid], reg, val); +} + +static inline int tps80031_writes(struct device *dev, int sid, int reg, + int len, uint8_t *val) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return regmap_bulk_write(tps80031->regmap[sid], reg, val, len); +} + +static inline int tps80031_read(struct device *dev, int sid, + int reg, uint8_t *val) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + unsigned int ival; + int ret; + + ret = regmap_read(tps80031->regmap[sid], reg, &ival); + if (ret < 0) { + dev_err(dev, "failed reading from reg 0x%02x\n", reg); + return ret; + } + + *val = ival; + return ret; +} + +static inline int tps80031_reads(struct device *dev, int sid, + int reg, int len, uint8_t *val) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return regmap_bulk_read(tps80031->regmap[sid], reg, val, len); +} + +static inline int tps80031_set_bits(struct device *dev, int sid, + int reg, uint8_t bit_mask) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return regmap_update_bits(tps80031->regmap[sid], reg, + bit_mask, bit_mask); +} + +static inline int tps80031_clr_bits(struct device *dev, int sid, + int reg, uint8_t bit_mask) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return regmap_update_bits(tps80031->regmap[sid], reg, bit_mask, 0); +} + +static inline int tps80031_update(struct device *dev, int sid, + int reg, uint8_t val, uint8_t mask) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return regmap_update_bits(tps80031->regmap[sid], reg, mask, val); +} + +static inline unsigned long tps80031_get_chip_info(struct device *dev) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return tps80031->chip_info; +} + +static inline int tps80031_get_pmu_version(struct device *dev) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return tps80031->es_version; +} + +static inline int tps80031_irq_get_virq(struct device *dev, int irq) +{ + struct tps80031 *tps80031 = dev_get_drvdata(dev); + + return regmap_irq_get_virq(tps80031->irq_data, irq); +} + +extern int tps80031_ext_power_req_config(struct device *dev, + unsigned long ext_ctrl_flag, int preq_bit, + int state_reg_add, int trans_reg_add); +#endif /*__LINUX_MFD_TPS80031_H */ -- cgit v1.2.3 From 3863db3e800c64e21e4effcc3de0f72cdb9b0d77 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 20 Nov 2012 08:44:47 +0530 Subject: mfd: tps65090: Remove unused member of struct tps65090 Remove unused member from tps65090 data structure as these are not used. Signed-off-by: Laxman Dewangan Signed-off-by: Samuel Ortiz --- drivers/mfd/tps65090.c | 6 +----- include/linux/mfd/tps65090.h | 11 ----------- 2 files changed, 1 insertion(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c index f95f7f6b846c..3cfc9dcbe9dc 100644 --- a/drivers/mfd/tps65090.c +++ b/drivers/mfd/tps65090.c @@ -269,12 +269,9 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client, return -ENOMEM; } - tps65090->client = client; tps65090->dev = &client->dev; i2c_set_clientdata(client, tps65090); - mutex_init(&tps65090->lock); - if (client->irq) { ret = tps65090_irq_init(tps65090, client->irq, pdata->irq_base); if (ret) { @@ -284,8 +281,7 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client, } } - tps65090->rmap = devm_regmap_init_i2c(tps65090->client, - &tps65090_regmap_config); + tps65090->rmap = devm_regmap_init_i2c(client, &tps65090_regmap_config); if (IS_ERR(tps65090->rmap)) { ret = PTR_ERR(tps65090->rmap); dev_err(&client->dev, "regmap_init failed with err: %d\n", ret); diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h index 6bc31d854626..6c576224f637 100644 --- a/include/linux/mfd/tps65090.h +++ b/include/linux/mfd/tps65090.h @@ -25,26 +25,15 @@ #include struct tps65090 { - struct mutex lock; struct device *dev; - struct i2c_client *client; struct regmap *rmap; struct irq_chip irq_chip; struct mutex irq_lock; int irq_base; - unsigned int id; -}; - -struct tps65090_subdev_info { - int id; - const char *name; - void *platform_data; }; struct tps65090_platform_data { int irq_base; - int num_subdevs; - struct tps65090_subdev_info *subdevs; }; /* -- cgit v1.2.3 From b9c79323166530a14c1fa8c10337eeaa54e3f98d Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 20 Nov 2012 08:44:48 +0530 Subject: mfd: tps65090: Move register access APIs to header Since tps65090 register is accessed via regmap, moving the register access APIs to header and making it as inline. Signed-off-by: Laxman Dewangan Signed-off-by: Samuel Ortiz --- drivers/mfd/tps65090.c | 34 ---------------------------------- include/linux/mfd/tps65090.h | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c index 3cfc9dcbe9dc..355a07749454 100644 --- a/drivers/mfd/tps65090.c +++ b/drivers/mfd/tps65090.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #define NUM_INT_REG 2 @@ -78,39 +77,6 @@ static struct mfd_cell tps65090s[] = { }, }; -int tps65090_write(struct device *dev, int reg, uint8_t val) -{ - struct tps65090 *tps = dev_get_drvdata(dev); - return regmap_write(tps->rmap, reg, val); -} -EXPORT_SYMBOL_GPL(tps65090_write); - -int tps65090_read(struct device *dev, int reg, uint8_t *val) -{ - struct tps65090 *tps = dev_get_drvdata(dev); - unsigned int temp_val; - int ret; - ret = regmap_read(tps->rmap, reg, &temp_val); - if (!ret) - *val = temp_val; - return ret; -} -EXPORT_SYMBOL_GPL(tps65090_read); - -int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num) -{ - struct tps65090 *tps = dev_get_drvdata(dev); - return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u); -} -EXPORT_SYMBOL_GPL(tps65090_set_bits); - -int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num) -{ - struct tps65090 *tps = dev_get_drvdata(dev); - return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u); -} -EXPORT_SYMBOL_GPL(tps65090_clr_bits); - static void tps65090_irq_lock(struct irq_data *data) { struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data); diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h index 6c576224f637..1a5f916b7383 100644 --- a/include/linux/mfd/tps65090.h +++ b/include/linux/mfd/tps65090.h @@ -23,6 +23,7 @@ #define __LINUX_MFD_TPS65090_H #include +#include struct tps65090 { struct device *dev; @@ -40,9 +41,39 @@ struct tps65090_platform_data { * NOTE: the functions below are not intended for use outside * of the TPS65090 sub-device drivers */ -extern int tps65090_write(struct device *dev, int reg, uint8_t val); -extern int tps65090_read(struct device *dev, int reg, uint8_t *val); -extern int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num); -extern int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num); +static inline int tps65090_write(struct device *dev, int reg, uint8_t val) +{ + struct tps65090 *tps = dev_get_drvdata(dev); + + return regmap_write(tps->rmap, reg, val); +} + +static inline int tps65090_read(struct device *dev, int reg, uint8_t *val) +{ + struct tps65090 *tps = dev_get_drvdata(dev); + unsigned int temp_val; + int ret; + + ret = regmap_read(tps->rmap, reg, &temp_val); + if (!ret) + *val = temp_val; + return ret; +} + +static inline int tps65090_set_bits(struct device *dev, int reg, + uint8_t bit_num) +{ + struct tps65090 *tps = dev_get_drvdata(dev); + + return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u); +} + +static inline int tps65090_clr_bits(struct device *dev, int reg, + uint8_t bit_num) +{ + struct tps65090 *tps = dev_get_drvdata(dev); + + return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u); +} #endif /*__LINUX_MFD_TPS65090_H */ -- cgit v1.2.3 From 759f2598ef3876637e40d99a4ceb7a3d83a4d8d3 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 20 Nov 2012 08:44:49 +0530 Subject: mfd: tps65090: Use regmap irq framework for interrupt support Use the regmap irq framework for implementing TPS65090 interrupt support in place of implementing it locally. Signed-off-by: Laxman Dewangan Signed-off-by: Samuel Ortiz --- drivers/mfd/tps65090.c | 263 ++++++++++++++++--------------------------- include/linux/mfd/tps65090.h | 23 +++- 2 files changed, 118 insertions(+), 168 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c index 355a07749454..2eaae52cb5b8 100644 --- a/drivers/mfd/tps65090.c +++ b/drivers/mfd/tps65090.c @@ -38,35 +38,21 @@ #define TPS65090_INT_MSK 0x2 #define TPS65090_INT_MSK2 0x3 -struct tps65090_irq_data { - u8 mask_reg; - u8 mask_pos; -}; - -#define TPS65090_IRQ(_reg, _mask_pos) \ - { \ - .mask_reg = (_reg), \ - .mask_pos = (_mask_pos), \ - } - -static const struct tps65090_irq_data tps65090_irqs[] = { - [0] = TPS65090_IRQ(0, 0), - [1] = TPS65090_IRQ(0, 1), - [2] = TPS65090_IRQ(0, 2), - [3] = TPS65090_IRQ(0, 3), - [4] = TPS65090_IRQ(0, 4), - [5] = TPS65090_IRQ(0, 5), - [6] = TPS65090_IRQ(0, 6), - [7] = TPS65090_IRQ(0, 7), - [8] = TPS65090_IRQ(1, 0), - [9] = TPS65090_IRQ(1, 1), - [10] = TPS65090_IRQ(1, 2), - [11] = TPS65090_IRQ(1, 3), - [12] = TPS65090_IRQ(1, 4), - [13] = TPS65090_IRQ(1, 5), - [14] = TPS65090_IRQ(1, 6), - [15] = TPS65090_IRQ(1, 7), -}; +#define TPS65090_INT1_MASK_VAC_STATUS_CHANGE 1 +#define TPS65090_INT1_MASK_VSYS_STATUS_CHANGE 2 +#define TPS65090_INT1_MASK_BAT_STATUS_CHANGE 3 +#define TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE 4 +#define TPS65090_INT1_MASK_CHARGING_COMPLETE 5 +#define TPS65090_INT1_MASK_OVERLOAD_DCDC1 6 +#define TPS65090_INT1_MASK_OVERLOAD_DCDC2 7 +#define TPS65090_INT2_MASK_OVERLOAD_DCDC3 0 +#define TPS65090_INT2_MASK_OVERLOAD_FET1 1 +#define TPS65090_INT2_MASK_OVERLOAD_FET2 2 +#define TPS65090_INT2_MASK_OVERLOAD_FET3 3 +#define TPS65090_INT2_MASK_OVERLOAD_FET4 4 +#define TPS65090_INT2_MASK_OVERLOAD_FET5 5 +#define TPS65090_INT2_MASK_OVERLOAD_FET6 6 +#define TPS65090_INT2_MASK_OVERLOAD_FET7 7 static struct mfd_cell tps65090s[] = { { @@ -77,132 +63,77 @@ static struct mfd_cell tps65090s[] = { }, }; -static void tps65090_irq_lock(struct irq_data *data) -{ - struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data); - - mutex_lock(&tps65090->irq_lock); -} - -static void tps65090_irq_mask(struct irq_data *irq_data) -{ - struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data); - unsigned int __irq = irq_data->hwirq; - const struct tps65090_irq_data *data = &tps65090_irqs[__irq]; - - tps65090_set_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg), - data->mask_pos); -} - -static void tps65090_irq_unmask(struct irq_data *irq_data) -{ - struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data); - unsigned int __irq = irq_data->irq - tps65090->irq_base; - const struct tps65090_irq_data *data = &tps65090_irqs[__irq]; - - tps65090_clr_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg), - data->mask_pos); -} - -static void tps65090_irq_sync_unlock(struct irq_data *data) -{ - struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data); - - mutex_unlock(&tps65090->irq_lock); -} - -static irqreturn_t tps65090_irq(int irq, void *data) -{ - struct tps65090 *tps65090 = data; - int ret = 0; - u8 status, mask; - unsigned long int acks = 0; - int i; - - for (i = 0; i < NUM_INT_REG; i++) { - ret = tps65090_read(tps65090->dev, TPS65090_INT_MSK + i, &mask); - if (ret < 0) { - dev_err(tps65090->dev, - "failed to read mask reg [addr:%d]\n", - TPS65090_INT_MSK + i); - return IRQ_NONE; - } - ret = tps65090_read(tps65090->dev, TPS65090_INT_STS + i, - &status); - if (ret < 0) { - dev_err(tps65090->dev, - "failed to read status reg [addr:%d]\n", - TPS65090_INT_STS + i); - return IRQ_NONE; - } - if (status) { - /* Ack only those interrupts which are not masked */ - status &= (~mask); - ret = tps65090_write(tps65090->dev, - TPS65090_INT_STS + i, status); - if (ret < 0) { - dev_err(tps65090->dev, - "failed to write interrupt status\n"); - return IRQ_NONE; - } - acks |= (status << (i * 8)); - } - } - - for_each_set_bit(i, &acks, ARRAY_SIZE(tps65090_irqs)) - handle_nested_irq(tps65090->irq_base + i); - return acks ? IRQ_HANDLED : IRQ_NONE; -} - -static int __devinit tps65090_irq_init(struct tps65090 *tps65090, int irq, - int irq_base) -{ - int i, ret; - - if (!irq_base) { - dev_err(tps65090->dev, "IRQ base not set\n"); - return -EINVAL; - } - - mutex_init(&tps65090->irq_lock); - - for (i = 0; i < NUM_INT_REG; i++) - tps65090_write(tps65090->dev, TPS65090_INT_MSK + i, 0xFF); - - for (i = 0; i < NUM_INT_REG; i++) - tps65090_write(tps65090->dev, TPS65090_INT_STS + i, 0xff); - - tps65090->irq_base = irq_base; - tps65090->irq_chip.name = "tps65090"; - tps65090->irq_chip.irq_mask = tps65090_irq_mask; - tps65090->irq_chip.irq_unmask = tps65090_irq_unmask; - tps65090->irq_chip.irq_bus_lock = tps65090_irq_lock; - tps65090->irq_chip.irq_bus_sync_unlock = tps65090_irq_sync_unlock; - - for (i = 0; i < ARRAY_SIZE(tps65090_irqs); i++) { - int __irq = i + tps65090->irq_base; - irq_set_chip_data(__irq, tps65090); - irq_set_chip_and_handler(__irq, &tps65090->irq_chip, - handle_simple_irq); - irq_set_nested_thread(__irq, 1); -#ifdef CONFIG_ARM - set_irq_flags(__irq, IRQF_VALID); -#endif - } - - ret = request_threaded_irq(irq, NULL, tps65090_irq, IRQF_ONESHOT, - "tps65090", tps65090); - if (!ret) { - device_init_wakeup(tps65090->dev, 1); - enable_irq_wake(irq); - } +static const struct regmap_irq tps65090_irqs[] = { + /* INT1 IRQs*/ + [TPS65090_IRQ_VAC_STATUS_CHANGE] = { + .mask = TPS65090_INT1_MASK_VAC_STATUS_CHANGE, + }, + [TPS65090_IRQ_VSYS_STATUS_CHANGE] = { + .mask = TPS65090_INT1_MASK_VSYS_STATUS_CHANGE, + }, + [TPS65090_IRQ_BAT_STATUS_CHANGE] = { + .mask = TPS65090_INT1_MASK_BAT_STATUS_CHANGE, + }, + [TPS65090_IRQ_CHARGING_STATUS_CHANGE] = { + .mask = TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE, + }, + [TPS65090_IRQ_CHARGING_COMPLETE] = { + .mask = TPS65090_INT1_MASK_CHARGING_COMPLETE, + }, + [TPS65090_IRQ_OVERLOAD_DCDC1] = { + .mask = TPS65090_INT1_MASK_OVERLOAD_DCDC1, + }, + [TPS65090_IRQ_OVERLOAD_DCDC2] = { + .mask = TPS65090_INT1_MASK_OVERLOAD_DCDC2, + }, + /* INT2 IRQs*/ + [TPS65090_IRQ_OVERLOAD_DCDC3] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_DCDC3, + }, + [TPS65090_IRQ_OVERLOAD_FET1] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_FET1, + }, + [TPS65090_IRQ_OVERLOAD_FET2] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_FET2, + }, + [TPS65090_IRQ_OVERLOAD_FET3] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_FET3, + }, + [TPS65090_IRQ_OVERLOAD_FET4] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_FET4, + }, + [TPS65090_IRQ_OVERLOAD_FET5] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_FET5, + }, + [TPS65090_IRQ_OVERLOAD_FET6] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_FET6, + }, + [TPS65090_IRQ_OVERLOAD_FET7] = { + .reg_offset = 1, + .mask = TPS65090_INT2_MASK_OVERLOAD_FET7, + }, +}; - return ret; -} +static struct regmap_irq_chip tps65090_irq_chip = { + .name = "tps65090", + .irqs = tps65090_irqs, + .num_irqs = ARRAY_SIZE(tps65090_irqs), + .num_regs = NUM_INT_REG, + .status_base = TPS65090_INT_STS, + .mask_base = TPS65090_INT_MSK, + .mask_invert = true, +}; static bool is_volatile_reg(struct device *dev, unsigned int reg) { - if (reg == TPS65090_INT_STS) + if ((reg == TPS65090_INT_STS) || (reg == TPS65090_INT_STS2)) return true; else return false; @@ -238,24 +169,27 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client, tps65090->dev = &client->dev; i2c_set_clientdata(client, tps65090); - if (client->irq) { - ret = tps65090_irq_init(tps65090, client->irq, pdata->irq_base); - if (ret) { - dev_err(&client->dev, "IRQ init failed with err: %d\n", - ret); - goto err_exit; - } - } - tps65090->rmap = devm_regmap_init_i2c(client, &tps65090_regmap_config); if (IS_ERR(tps65090->rmap)) { ret = PTR_ERR(tps65090->rmap); dev_err(&client->dev, "regmap_init failed with err: %d\n", ret); - goto err_irq_exit; + return ret; + } + + if (client->irq) { + ret = regmap_add_irq_chip(tps65090->rmap, client->irq, + IRQF_ONESHOT | IRQF_TRIGGER_LOW, pdata->irq_base, + &tps65090_irq_chip, &tps65090->irq_data); + if (ret) { + dev_err(&client->dev, + "IRQ init failed with err: %d\n", ret); + return ret; + } } ret = mfd_add_devices(tps65090->dev, -1, tps65090s, - ARRAY_SIZE(tps65090s), NULL, 0, NULL); + ARRAY_SIZE(tps65090s), NULL, + regmap_irq_chip_get_base(tps65090->irq_data), NULL); if (ret) { dev_err(&client->dev, "add mfd devices failed with err: %d\n", ret); @@ -266,8 +200,7 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client, err_irq_exit: if (client->irq) - free_irq(client->irq, tps65090); -err_exit: + regmap_del_irq_chip(client->irq, tps65090->irq_data); return ret; } @@ -277,7 +210,7 @@ static int __devexit tps65090_i2c_remove(struct i2c_client *client) mfd_remove_devices(tps65090->dev); if (client->irq) - free_irq(client->irq, tps65090); + regmap_del_irq_chip(client->irq, tps65090->irq_data); return 0; } diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h index 1a5f916b7383..4bbbb1350b91 100644 --- a/include/linux/mfd/tps65090.h +++ b/include/linux/mfd/tps65090.h @@ -25,12 +25,29 @@ #include #include +/* TPS65090 IRQs */ +enum { + TPS65090_IRQ_VAC_STATUS_CHANGE, + TPS65090_IRQ_VSYS_STATUS_CHANGE, + TPS65090_IRQ_BAT_STATUS_CHANGE, + TPS65090_IRQ_CHARGING_STATUS_CHANGE, + TPS65090_IRQ_CHARGING_COMPLETE, + TPS65090_IRQ_OVERLOAD_DCDC1, + TPS65090_IRQ_OVERLOAD_DCDC2, + TPS65090_IRQ_OVERLOAD_DCDC3, + TPS65090_IRQ_OVERLOAD_FET1, + TPS65090_IRQ_OVERLOAD_FET2, + TPS65090_IRQ_OVERLOAD_FET3, + TPS65090_IRQ_OVERLOAD_FET4, + TPS65090_IRQ_OVERLOAD_FET5, + TPS65090_IRQ_OVERLOAD_FET6, + TPS65090_IRQ_OVERLOAD_FET7, +}; + struct tps65090 { struct device *dev; struct regmap *rmap; - struct irq_chip irq_chip; - struct mutex irq_lock; - int irq_base; + struct regmap_irq_chip_data *irq_data; }; struct tps65090_platform_data { -- cgit v1.2.3 From c7b76dce8ac95fd464bfae741b830d407884c274 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 18 Nov 2012 18:36:20 +0200 Subject: mfd: Introduce retu-mfd driver Retu is a multi-function device found on Nokia Internet Tablets implementing at least watchdog, RTC, headset detection and power button functionality. This patch implements minimum functionality providing register access, IRQ handling and power off functions. Acked-by: Felipe Balbi Acked-by: Tony Lindgren Signed-off-by: Aaro Koskinen Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 9 ++ drivers/mfd/Makefile | 1 + drivers/mfd/retu-mfd.c | 264 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/retu.h | 22 ++++ 4 files changed, 296 insertions(+) create mode 100644 drivers/mfd/retu-mfd.c create mode 100644 include/linux/mfd/retu.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index ca633df7c330..f5b839b718aa 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1094,6 +1094,15 @@ config MFD_VIPERBOARD You need to select the mfd cell drivers separately. The drivers do not support all features the board exposes. +config MFD_RETU + tristate "Support for Retu multi-function device" + select MFD_CORE + depends on I2C + select REGMAP_IRQ + help + Retu is a multi-function device found on Nokia Internet Tablets + (770, N800 and N810). + endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 8072460e99d2..2689c8a0d781 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -145,3 +145,4 @@ obj-$(CONFIG_MFD_RC5T583) += rc5t583.o rc5t583-irq.o obj-$(CONFIG_MFD_SEC_CORE) += sec-core.o sec-irq.o obj-$(CONFIG_MFD_SYSCON) += syscon.o obj-$(CONFIG_MFD_LM3533) += lm3533-core.o lm3533-ctrlbank.o +obj-$(CONFIG_MFD_RETU) += retu-mfd.o diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c new file mode 100644 index 000000000000..7ff4a37ab0c0 --- /dev/null +++ b/drivers/mfd/retu-mfd.c @@ -0,0 +1,264 @@ +/* + * Retu MFD driver + * + * Copyright (C) 2004, 2005 Nokia Corporation + * + * Based on code written by Juha Yrjölä, David Weinehall and Mikko Ylinen. + * Rewritten by Aaro Koskinen. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of this + * archive for more details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Registers */ +#define RETU_REG_ASICR 0x00 /* ASIC ID and revision */ +#define RETU_REG_ASICR_VILMA (1 << 7) /* Bit indicating Vilma */ +#define RETU_REG_IDR 0x01 /* Interrupt ID */ +#define RETU_REG_IMR 0x02 /* Interrupt mask */ + +/* Interrupt sources */ +#define RETU_INT_PWR 0 /* Power button */ + +struct retu_dev { + struct regmap *regmap; + struct device *dev; + struct mutex mutex; + struct regmap_irq_chip_data *irq_data; +}; + +static struct resource retu_pwrbutton_res[] = { + { + .name = "retu-pwrbutton", + .start = RETU_INT_PWR, + .end = RETU_INT_PWR, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct mfd_cell retu_devs[] = { + { + .name = "retu-wdt" + }, + { + .name = "retu-pwrbutton", + .resources = retu_pwrbutton_res, + .num_resources = ARRAY_SIZE(retu_pwrbutton_res), + } +}; + +static struct regmap_irq retu_irqs[] = { + [RETU_INT_PWR] = { + .mask = 1 << RETU_INT_PWR, + } +}; + +static struct regmap_irq_chip retu_irq_chip = { + .name = "RETU", + .irqs = retu_irqs, + .num_irqs = ARRAY_SIZE(retu_irqs), + .num_regs = 1, + .status_base = RETU_REG_IDR, + .mask_base = RETU_REG_IMR, + .ack_base = RETU_REG_IDR, +}; + +/* Retu device registered for the power off. */ +static struct retu_dev *retu_pm_power_off; + +int retu_read(struct retu_dev *rdev, u8 reg) +{ + int ret; + int value; + + mutex_lock(&rdev->mutex); + ret = regmap_read(rdev->regmap, reg, &value); + mutex_unlock(&rdev->mutex); + + return ret ? ret : value; +} +EXPORT_SYMBOL_GPL(retu_read); + +int retu_write(struct retu_dev *rdev, u8 reg, u16 data) +{ + int ret; + + mutex_lock(&rdev->mutex); + ret = regmap_write(rdev->regmap, reg, data); + mutex_unlock(&rdev->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(retu_write); + +static void retu_power_off(void) +{ + struct retu_dev *rdev = retu_pm_power_off; + int reg; + + mutex_lock(&retu_pm_power_off->mutex); + + /* Ignore power button state */ + regmap_read(rdev->regmap, RETU_REG_CC1, ®); + regmap_write(rdev->regmap, RETU_REG_CC1, reg | 2); + + /* Expire watchdog immediately */ + regmap_write(rdev->regmap, RETU_REG_WATCHDOG, 0); + + /* Wait for poweroff */ + for (;;) + cpu_relax(); + + mutex_unlock(&retu_pm_power_off->mutex); +} + +static int retu_regmap_read(void *context, const void *reg, size_t reg_size, + void *val, size_t val_size) +{ + int ret; + struct device *dev = context; + struct i2c_client *i2c = to_i2c_client(dev); + + BUG_ON(reg_size != 1 || val_size != 2); + + ret = i2c_smbus_read_word_data(i2c, *(u8 const *)reg); + if (ret < 0) + return ret; + + *(u16 *)val = ret; + return 0; +} + +static int retu_regmap_write(void *context, const void *data, size_t count) +{ + u8 reg; + u16 val; + struct device *dev = context; + struct i2c_client *i2c = to_i2c_client(dev); + + BUG_ON(count != sizeof(reg) + sizeof(val)); + memcpy(®, data, sizeof(reg)); + memcpy(&val, data + sizeof(reg), sizeof(val)); + return i2c_smbus_write_word_data(i2c, reg, val); +} + +static struct regmap_bus retu_bus = { + .read = retu_regmap_read, + .write = retu_regmap_write, + .val_format_endian_default = REGMAP_ENDIAN_NATIVE, +}; + +static struct regmap_config retu_config = { + .reg_bits = 8, + .val_bits = 16, +}; + +static int __devinit retu_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct retu_dev *rdev; + int ret; + + rdev = devm_kzalloc(&i2c->dev, sizeof(*rdev), GFP_KERNEL); + if (rdev == NULL) + return -ENOMEM; + + i2c_set_clientdata(i2c, rdev); + rdev->dev = &i2c->dev; + mutex_init(&rdev->mutex); + rdev->regmap = devm_regmap_init(&i2c->dev, &retu_bus, &i2c->dev, + &retu_config); + if (IS_ERR(rdev->regmap)) + return PTR_ERR(rdev->regmap); + + ret = retu_read(rdev, RETU_REG_ASICR); + if (ret < 0) { + dev_err(rdev->dev, "could not read Retu revision: %d\n", ret); + return ret; + } + + dev_info(rdev->dev, "Retu%s v%d.%d found\n", + (ret & RETU_REG_ASICR_VILMA) ? " & Vilma" : "", + (ret >> 4) & 0x7, ret & 0xf); + + /* Mask all RETU interrupts. */ + ret = retu_write(rdev, RETU_REG_IMR, 0xffff); + if (ret < 0) + return ret; + + ret = regmap_add_irq_chip(rdev->regmap, i2c->irq, IRQF_ONESHOT, -1, + &retu_irq_chip, &rdev->irq_data); + if (ret < 0) + return ret; + + ret = mfd_add_devices(rdev->dev, -1, retu_devs, ARRAY_SIZE(retu_devs), + NULL, regmap_irq_chip_get_base(rdev->irq_data), + NULL); + if (ret < 0) { + regmap_del_irq_chip(i2c->irq, rdev->irq_data); + return ret; + } + + if (!pm_power_off) { + retu_pm_power_off = rdev; + pm_power_off = retu_power_off; + } + + return 0; +} + +static int __devexit retu_remove(struct i2c_client *i2c) +{ + struct retu_dev *rdev = i2c_get_clientdata(i2c); + + if (retu_pm_power_off == rdev) { + pm_power_off = NULL; + retu_pm_power_off = NULL; + } + mfd_remove_devices(rdev->dev); + regmap_del_irq_chip(i2c->irq, rdev->irq_data); + + return 0; +} + +static const struct i2c_device_id retu_id[] = { + { "retu-mfd", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, retu_id); + +static struct i2c_driver retu_driver = { + .driver = { + .name = "retu-mfd", + .owner = THIS_MODULE, + }, + .probe = retu_probe, + .remove = retu_remove, + .id_table = retu_id, +}; +module_i2c_driver(retu_driver); + +MODULE_DESCRIPTION("Retu MFD driver"); +MODULE_AUTHOR("Juha Yrjölä"); +MODULE_AUTHOR("David Weinehall"); +MODULE_AUTHOR("Mikko Ylinen"); +MODULE_AUTHOR("Aaro Koskinen "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/retu.h b/include/linux/mfd/retu.h new file mode 100644 index 000000000000..1e2715d5b836 --- /dev/null +++ b/include/linux/mfd/retu.h @@ -0,0 +1,22 @@ +/* + * Retu MFD driver interface + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of this + * archive for more details. + */ + +#ifndef __LINUX_MFD_RETU_H +#define __LINUX_MFD_RETU_H + +struct retu_dev; + +int retu_read(struct retu_dev *, u8); +int retu_write(struct retu_dev *, u8, u16); + +/* Registers */ +#define RETU_REG_WATCHDOG 0x17 /* Watchdog */ +#define RETU_REG_CC1 0x0d /* Common control register 1 */ +#define RETU_REG_STATUS 0x16 /* Status register */ + +#endif /* __LINUX_MFD_RETU_H */ -- cgit v1.2.3 From 0e8f1398a388bbaa5ca965711b9ed5ac4794332d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Nov 2012 09:28:47 +0100 Subject: mfd: twl: Remove unused TWL_MODULE definitions AUDIO and MADC only available on twl4030 series and the TWL_MODULE_* mapping is not needed. Acked-by: Tero Kristo Signed-off-by: Peter Ujfalusi Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 9a5e28462324..7278c72479d1 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -73,9 +73,7 @@ #define TWL4030_MODULE_SECURED_REG 0x17 #define TWL_MODULE_USB TWL4030_MODULE_USB -#define TWL_MODULE_AUDIO_VOICE TWL4030_MODULE_AUDIO_VOICE #define TWL_MODULE_PIH TWL4030_MODULE_PIH -#define TWL_MODULE_MADC TWL4030_MODULE_MADC #define TWL_MODULE_MAIN_CHARGE TWL4030_MODULE_MAIN_CHARGE #define TWL_MODULE_PM_MASTER TWL4030_MODULE_PM_MASTER #define TWL_MODULE_PM_RECEIVER TWL4030_MODULE_PM_RECEIVER -- cgit v1.2.3 From da059ecfc9f9d98556607c6d6db065aa3b7f162d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Nov 2012 09:28:48 +0100 Subject: mfd: twl: Convert module id definitions to enums Use enum list for the module definitions (TWL4030_MODULE_*) which will ease up future work with the IDs. At the same time group the IDs in block of five so it is easier to find the ID we are looking for (to count the number they stand for). At the same time define TWL_MODULE_LED so client drivers can switch to use it as soon as it is possible. Acked-by: Tero Kristo Signed-off-by: Peter Ujfalusi Signed-off-by: Samuel Ortiz --- drivers/mfd/twl-core.c | 9 +++---- include/linux/i2c/twl.h | 65 +++++++++++++++++++++++++------------------------ 2 files changed, 36 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index d666c9d454cb..bb33b52cfa57 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -66,9 +66,6 @@ /* Triton Core internal information (BEGIN) */ -/* Last - for index max*/ -#define TWL4030_MODULE_LAST TWL4030_MODULE_SECURED_REG - #define TWL_NUM_SLAVES 4 #define SUB_CHIP_ID0 0 @@ -184,7 +181,7 @@ struct twl_mapping { }; static struct twl_mapping *twl_map; -static struct twl_mapping twl4030_map[TWL4030_MODULE_LAST + 1] = { +static struct twl_mapping twl4030_map[] = { /* * NOTE: don't change this table without updating the * defines for TWL4030_MODULE_* @@ -327,7 +324,7 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes) int sid; struct twl_client *twl; - if (unlikely(mod_no > TWL_MODULE_LAST)) { + if (unlikely(mod_no >= TWL_MODULE_LAST)) { pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no); return -EPERM; } @@ -369,7 +366,7 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes) int sid; struct twl_client *twl; - if (unlikely(mod_no > TWL_MODULE_LAST)) { + if (unlikely(mod_no >= TWL_MODULE_LAST)) { pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no); return -EPERM; } diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 7278c72479d1..b1c44cccef31 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -39,39 +39,39 @@ * address each module uses within a given i2c slave. */ -/* Slave 0 (i2c address 0x48) */ -#define TWL4030_MODULE_USB 0x00 - -/* Slave 1 (i2c address 0x49) */ -#define TWL4030_MODULE_AUDIO_VOICE 0x01 -#define TWL4030_MODULE_GPIO 0x02 -#define TWL4030_MODULE_INTBR 0x03 -#define TWL4030_MODULE_PIH 0x04 -#define TWL4030_MODULE_TEST 0x05 - -/* Slave 2 (i2c address 0x4a) */ -#define TWL4030_MODULE_KEYPAD 0x06 -#define TWL4030_MODULE_MADC 0x07 -#define TWL4030_MODULE_INTERRUPTS 0x08 -#define TWL4030_MODULE_LED 0x09 -#define TWL4030_MODULE_MAIN_CHARGE 0x0A -#define TWL4030_MODULE_PRECHARGE 0x0B -#define TWL4030_MODULE_PWM0 0x0C -#define TWL4030_MODULE_PWM1 0x0D -#define TWL4030_MODULE_PWMA 0x0E -#define TWL4030_MODULE_PWMB 0x0F - -#define TWL5031_MODULE_ACCESSORY 0x10 -#define TWL5031_MODULE_INTERRUPTS 0x11 - -/* Slave 3 (i2c address 0x4b) */ -#define TWL4030_MODULE_BACKUP 0x12 -#define TWL4030_MODULE_INT 0x13 -#define TWL4030_MODULE_PM_MASTER 0x14 -#define TWL4030_MODULE_PM_RECEIVER 0x15 -#define TWL4030_MODULE_RTC 0x16 -#define TWL4030_MODULE_SECURED_REG 0x17 +enum twl4030_module_ids { + TWL4030_MODULE_USB = 0, /* Slave 0 (i2c address 0x48) */ + TWL4030_MODULE_AUDIO_VOICE, /* Slave 1 (i2c address 0x49) */ + TWL4030_MODULE_GPIO, + TWL4030_MODULE_INTBR, + TWL4030_MODULE_PIH, + + TWL4030_MODULE_TEST, + TWL4030_MODULE_KEYPAD, /* Slave 2 (i2c address 0x4a) */ + TWL4030_MODULE_MADC, + TWL4030_MODULE_INTERRUPTS, + TWL4030_MODULE_LED, + + TWL4030_MODULE_MAIN_CHARGE, + TWL4030_MODULE_PRECHARGE, + TWL4030_MODULE_PWM0, + TWL4030_MODULE_PWM1, + TWL4030_MODULE_PWMA, + + TWL4030_MODULE_PWMB, + TWL5031_MODULE_ACCESSORY, + TWL5031_MODULE_INTERRUPTS, + TWL4030_MODULE_BACKUP, /* Slave 3 (i2c address 0x4b) */ + TWL4030_MODULE_INT, + + TWL4030_MODULE_PM_MASTER, + TWL4030_MODULE_PM_RECEIVER, + TWL4030_MODULE_RTC, + TWL4030_MODULE_SECURED_REG, + TWL4030_MODULE_LAST, +}; +/* Similar functionalities implemented in TWL4030/6030 */ #define TWL_MODULE_USB TWL4030_MODULE_USB #define TWL_MODULE_PIH TWL4030_MODULE_PIH #define TWL_MODULE_MAIN_CHARGE TWL4030_MODULE_MAIN_CHARGE @@ -79,6 +79,7 @@ #define TWL_MODULE_PM_RECEIVER TWL4030_MODULE_PM_RECEIVER #define TWL_MODULE_RTC TWL4030_MODULE_RTC #define TWL_MODULE_PWM TWL4030_MODULE_PWM0 +#define TWL_MODULE_LED TWL4030_MODULE_LED #define TWL6030_MODULE_ID0 0x0D #define TWL6030_MODULE_ID1 0x0E -- cgit v1.2.3 From 04575d5a3dc15d762c7428d2745d30b2d448e443 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Nov 2012 09:28:49 +0100 Subject: mfd: twl: Use decimal numbers for TWL6030_MODULE_IDs It is easier fro humans to understand decimal numbers than hexadecimals when they are used as indexes. Acked-by: Tero Kristo Signed-off-by: Peter Ujfalusi Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index b1c44cccef31..1ff54b114efc 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -81,9 +81,9 @@ enum twl4030_module_ids { #define TWL_MODULE_PWM TWL4030_MODULE_PWM0 #define TWL_MODULE_LED TWL4030_MODULE_LED -#define TWL6030_MODULE_ID0 0x0D -#define TWL6030_MODULE_ID1 0x0E -#define TWL6030_MODULE_ID2 0x0F +#define TWL6030_MODULE_ID0 13 +#define TWL6030_MODULE_ID1 14 +#define TWL6030_MODULE_ID2 15 #define GPIO_INTR_OFFSET 0 #define KEYPAD_INTR_OFFSET 1 -- cgit v1.2.3 From ef85fb28305fad7617f307383ebba554a3a891a2 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 20 Nov 2012 22:24:27 +0000 Subject: bcma: add locking around GPIO register accesses The GPIOs are access through some registers in the chip common core. We need locking around these GPIO accesses, all GPIOs are accessed through the same registers and parallel writes will cause problems. Signed-off-by: Hauke Mehrtens Patchwork: http://patchwork.linux-mips.org/patch/4585 Acked-by: Florian Fainelli --- drivers/bcma/driver_chipcommon.c | 47 ++++++++++++++++++++++++++--- include/linux/bcma/bcma_driver_chipcommon.h | 3 ++ 2 files changed, 45 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index a4c3ebcc4c86..c9b63d9ff61d 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -30,6 +30,8 @@ void bcma_core_chipcommon_init(struct bcma_drv_cc *cc) if (cc->setup_done) return; + spin_lock_init(&cc->gpio_lock); + if (cc->core->id.rev >= 11) cc->status = bcma_cc_read32(cc, BCMA_CC_CHIPSTAT); cc->capabilities = bcma_cc_read32(cc, BCMA_CC_CAP); @@ -84,28 +86,63 @@ u32 bcma_chipco_gpio_in(struct bcma_drv_cc *cc, u32 mask) u32 bcma_chipco_gpio_out(struct bcma_drv_cc *cc, u32 mask, u32 value) { - return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value); + unsigned long flags; + u32 res; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value) { - return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value); + unsigned long flags; + u32 res; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value) { - return bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value); + unsigned long flags; + u32 res; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } EXPORT_SYMBOL_GPL(bcma_chipco_gpio_control); u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value) { - return bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value); + unsigned long flags; + u32 res; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value) { - return bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value); + unsigned long flags; + u32 res; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } #ifdef CONFIG_BCMA_DRIVER_MIPS diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 1cf1749440ac..a085d986c804 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -567,6 +567,9 @@ struct bcma_drv_cc { int nr_serial_ports; struct bcma_serial_port serial_ports[4]; #endif /* CONFIG_BCMA_DRIVER_MIPS */ + + /* Lock for GPIO register access. */ + spinlock_t gpio_lock; }; /* Register access */ -- cgit v1.2.3 From ea3488f4696fe22811eb19bee7088b732d3f8fe0 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 20 Nov 2012 22:24:28 +0000 Subject: bcma: add bcma_chipco_gpio_pull{up,down} Add functions to access the GPIO registers for pullup and pulldown. These are needed for handling gpio registration. Signed-off-by: Hauke Mehrtens Patchwork: http://patchwork.linux-mips.org/patch/4586 Acked-by: Florian Fainelli --- drivers/bcma/driver_chipcommon.c | 30 +++++++++++++++++++++++++++++ include/linux/bcma/bcma_driver_chipcommon.h | 2 ++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index c9b63d9ff61d..0945383c2271 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -145,6 +145,36 @@ u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value) return res; } +u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value) +{ + unsigned long flags; + u32 res; + + if (cc->core->id.rev < 20) + return 0; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPULLUP, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; +} + +u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value) +{ + unsigned long flags; + u32 res; + + if (cc->core->id.rev < 20) + return 0; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPULLDOWN, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; +} + #ifdef CONFIG_BCMA_DRIVER_MIPS void bcma_chipco_serial_init(struct bcma_drv_cc *cc) { diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index a085d986c804..256702663b68 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -606,6 +606,8 @@ u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value); +u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value); +u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value); /* PMU support */ extern void bcma_pmu_init(struct bcma_drv_cc *cc); -- cgit v1.2.3 From cf0936b06d8e98a157630e99f647e2ff6d29d7ad Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 20 Nov 2012 22:24:30 +0000 Subject: bcma: add GPIO driver Register a GPIO driver to access the GPIOs provided by the chip. The GPIOs of the SoC should always start at 0 and the other GPIOs could start at a random position. There is just one SoC in a system and when they start at 0 the number is predictable. Signed-off-by: Hauke Mehrtens Patchwork: http://patchwork.linux-mips.org/patch/4587 Acked-by: Florian Fainelli --- drivers/bcma/Kconfig | 9 +++ drivers/bcma/Makefile | 1 + drivers/bcma/bcma_private.h | 10 +++ drivers/bcma/driver_gpio.c | 98 +++++++++++++++++++++++++++++ drivers/bcma/main.c | 5 ++ include/linux/bcma/bcma_driver_chipcommon.h | 5 ++ 6 files changed, 128 insertions(+) create mode 100644 drivers/bcma/driver_gpio.c (limited to 'include/linux') diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig index a533af218368..d7b56a88c9f4 100644 --- a/drivers/bcma/Kconfig +++ b/drivers/bcma/Kconfig @@ -65,6 +65,15 @@ config BCMA_DRIVER_GMAC_CMN If unsure, say N +config BCMA_DRIVER_GPIO + bool "BCMA GPIO driver" + depends on BCMA + select GPIOLIB + help + Driver to provide access to the GPIO pins of the bcma bus. + + If unsure, say N + config BCMA_DEBUG bool "BCMA debugging" depends on BCMA diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile index 8ad42d41b2f2..734b32f09c0a 100644 --- a/drivers/bcma/Makefile +++ b/drivers/bcma/Makefile @@ -6,6 +6,7 @@ bcma-y += driver_pci.o bcma-$(CONFIG_BCMA_DRIVER_PCI_HOSTMODE) += driver_pci_host.o bcma-$(CONFIG_BCMA_DRIVER_MIPS) += driver_mips.o bcma-$(CONFIG_BCMA_DRIVER_GMAC_CMN) += driver_gmac_cmn.o +bcma-$(CONFIG_BCMA_DRIVER_GPIO) += driver_gpio.o bcma-$(CONFIG_BCMA_HOST_PCI) += host_pci.o bcma-$(CONFIG_BCMA_HOST_SOC) += host_soc.o obj-$(CONFIG_BCMA) += bcma.o diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 169fc58427d3..8cd80bff8e91 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -89,4 +89,14 @@ bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc); void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc); #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */ +#ifdef CONFIG_BCMA_DRIVER_GPIO +/* driver_gpio.c */ +int bcma_gpio_init(struct bcma_drv_cc *cc); +#else +static inline int bcma_gpio_init(struct bcma_drv_cc *cc) +{ + return -ENOTSUPP; +} +#endif /* CONFIG_BCMA_DRIVER_GPIO */ + #endif diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c new file mode 100644 index 000000000000..9a6f585da2d9 --- /dev/null +++ b/drivers/bcma/driver_gpio.c @@ -0,0 +1,98 @@ +/* + * Broadcom specific AMBA + * GPIO driver + * + * Copyright 2011, Broadcom Corporation + * Copyright 2012, Hauke Mehrtens + * + * Licensed under the GNU/GPL. See COPYING for details. + */ + +#include +#include +#include + +#include "bcma_private.h" + +static inline struct bcma_drv_cc *bcma_gpio_get_cc(struct gpio_chip *chip) +{ + return container_of(chip, struct bcma_drv_cc, gpio); +} + +static int bcma_gpio_get_value(struct gpio_chip *chip, unsigned gpio) +{ + struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); + + return !!bcma_chipco_gpio_in(cc, 1 << gpio); +} + +static void bcma_gpio_set_value(struct gpio_chip *chip, unsigned gpio, + int value) +{ + struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); + + bcma_chipco_gpio_out(cc, 1 << gpio, value ? 1 << gpio : 0); +} + +static int bcma_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) +{ + struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); + + bcma_chipco_gpio_outen(cc, 1 << gpio, 0); + return 0; +} + +static int bcma_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, + int value) +{ + struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); + + bcma_chipco_gpio_outen(cc, 1 << gpio, 1 << gpio); + bcma_chipco_gpio_out(cc, 1 << gpio, value ? 1 << gpio : 0); + return 0; +} + +static int bcma_gpio_request(struct gpio_chip *chip, unsigned gpio) +{ + struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); + + bcma_chipco_gpio_control(cc, 1 << gpio, 0); + /* clear pulldown */ + bcma_chipco_gpio_pulldown(cc, 1 << gpio, 0); + /* Set pullup */ + bcma_chipco_gpio_pullup(cc, 1 << gpio, 1 << gpio); + + return 0; +} + +static void bcma_gpio_free(struct gpio_chip *chip, unsigned gpio) +{ + struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip); + + /* clear pullup */ + bcma_chipco_gpio_pullup(cc, 1 << gpio, 0); +} + +int bcma_gpio_init(struct bcma_drv_cc *cc) +{ + struct gpio_chip *chip = &cc->gpio; + + chip->label = "bcma_gpio"; + chip->owner = THIS_MODULE; + chip->request = bcma_gpio_request; + chip->free = bcma_gpio_free; + chip->get = bcma_gpio_get_value; + chip->set = bcma_gpio_set_value; + chip->direction_input = bcma_gpio_direction_input; + chip->direction_output = bcma_gpio_direction_output; + chip->ngpio = 16; + /* There is just one SoC in one device and its GPIO addresses should be + * deterministic to address them more easily. The other buses could get + * a random base number. */ + if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC) + chip->base = 0; + else + chip->base = -1; + + return gpiochip_add(chip); +} diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index d865470bc951..478ba01ca0c2 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -152,6 +152,11 @@ static int bcma_register_cores(struct bcma_bus *bus) bcma_err(bus, "Error registering NAND flash\n"); } #endif + err = bcma_gpio_init(&bus->drv_cc); + if (err == -ENOTSUPP) + bcma_debug(bus, "GPIO driver not activated\n"); + else if (err) + bcma_err(bus, "Error registering GPIO driver: %i\n", err); return 0; } diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 256702663b68..7d662a988d24 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -1,6 +1,8 @@ #ifndef LINUX_BCMA_DRIVER_CC_H_ #define LINUX_BCMA_DRIVER_CC_H_ +#include + /** ChipCommon core registers. **/ #define BCMA_CC_ID 0x0000 #define BCMA_CC_ID_ID 0x0000FFFF @@ -570,6 +572,9 @@ struct bcma_drv_cc { /* Lock for GPIO register access. */ spinlock_t gpio_lock; +#ifdef CONFIG_BCMA_DRIVER_GPIO + struct gpio_chip gpio; +#endif }; /* Register access */ -- cgit v1.2.3 From da22f22e91f0d14d996c7258101575a5a06ddf85 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 20 Nov 2012 22:24:31 +0000 Subject: ssb: add ssb_chipco_gpio_pull{up,down} Add functions to access the GPIO registers for pullup and pulldown. These are needed for handling gpio registration. Signed-off-by: Hauke Mehrtens Patchwork: http://patchwork.linux-mips.org/patch/4589 Acked-by: Florian Fainelli --- drivers/ssb/driver_chipcommon.c | 16 ++++++++++++++++ include/linux/ssb/ssb_driver_chipcommon.h | 2 ++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c index e9d2ca11283b..4df492665565 100644 --- a/drivers/ssb/driver_chipcommon.c +++ b/drivers/ssb/driver_chipcommon.c @@ -442,6 +442,22 @@ u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value) return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value); } +u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value) +{ + if (cc->dev->id.revision < 20) + return 0xffffffff; + + return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value); +} + +u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value) +{ + if (cc->dev->id.revision < 20) + return 0xffffffff; + + return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value); +} + #ifdef CONFIG_SSB_SERIAL int ssb_chipco_serial_init(struct ssb_chipcommon *cc, struct ssb_serial_port *ports) diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h index c2b02a5c86ae..c8d07c95d76e 100644 --- a/include/linux/ssb/ssb_driver_chipcommon.h +++ b/include/linux/ssb/ssb_driver_chipcommon.h @@ -644,6 +644,8 @@ u32 ssb_chipco_gpio_outen(struct ssb_chipcommon *cc, u32 mask, u32 value); u32 ssb_chipco_gpio_control(struct ssb_chipcommon *cc, u32 mask, u32 value); u32 ssb_chipco_gpio_intmask(struct ssb_chipcommon *cc, u32 mask, u32 value); u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value); +u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value); +u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value); #ifdef CONFIG_SSB_SERIAL extern int ssb_chipco_serial_init(struct ssb_chipcommon *cc, -- cgit v1.2.3 From 394bc7e38be79987ed15de203920c3cddb724cc1 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 20 Nov 2012 22:24:32 +0000 Subject: ssb: add locking around gpio register accesses The GPIOs are access through some registers in the chip common core or over extif. We need locking around these GPIO accesses, all GPIOs are accessed through the same registers and parallel writes will cause problems. Signed-off-by: Hauke Mehrtens Patchwork: http://patchwork.linux-mips.org/patch/4590 Acked-by: Florian Fainelli --- drivers/ssb/driver_chipcommon.c | 66 +++++++++++++++++++++++++++---- drivers/ssb/driver_extif.c | 43 ++++++++++++++++++-- drivers/ssb/main.c | 1 + drivers/ssb/ssb_private.h | 8 ++++ include/linux/ssb/ssb_driver_chipcommon.h | 1 + include/linux/ssb/ssb_driver_extif.h | 1 + 6 files changed, 109 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c index 4df492665565..24e02bb2ecd8 100644 --- a/drivers/ssb/driver_chipcommon.c +++ b/drivers/ssb/driver_chipcommon.c @@ -284,6 +284,9 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc) { if (!cc->dev) return; /* We don't have a ChipCommon */ + + spin_lock_init(&cc->gpio_lock); + if (cc->dev->id.revision >= 11) cc->status = chipco_read32(cc, SSB_CHIPCO_CHIPSTAT); ssb_dprintk(KERN_INFO PFX "chipcommon status is 0x%x\n", cc->status); @@ -418,44 +421,93 @@ u32 ssb_chipco_gpio_in(struct ssb_chipcommon *cc, u32 mask) u32 ssb_chipco_gpio_out(struct ssb_chipcommon *cc, u32 mask, u32 value) { - return chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUT, mask, value); + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUT, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 ssb_chipco_gpio_outen(struct ssb_chipcommon *cc, u32 mask, u32 value) { - return chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUTEN, mask, value); + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUTEN, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 ssb_chipco_gpio_control(struct ssb_chipcommon *cc, u32 mask, u32 value) { - return chipco_write32_masked(cc, SSB_CHIPCO_GPIOCTL, mask, value); + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOCTL, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } EXPORT_SYMBOL(ssb_chipco_gpio_control); u32 ssb_chipco_gpio_intmask(struct ssb_chipcommon *cc, u32 mask, u32 value) { - return chipco_write32_masked(cc, SSB_CHIPCO_GPIOIRQ, mask, value); + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOIRQ, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value) { - return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value); + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&cc->gpio_lock, flags); + res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value) { + unsigned long flags; + u32 res = 0; + if (cc->dev->id.revision < 20) return 0xffffffff; - return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value); + spin_lock_irqsave(&cc->gpio_lock, flags); + res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value) { + unsigned long flags; + u32 res = 0; + if (cc->dev->id.revision < 20) return 0xffffffff; - return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value); + spin_lock_irqsave(&cc->gpio_lock, flags); + res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value); + spin_unlock_irqrestore(&cc->gpio_lock, flags); + + return res; } #ifdef CONFIG_SSB_SERIAL diff --git a/drivers/ssb/driver_extif.c b/drivers/ssb/driver_extif.c index dc47f30e9cf7..e1d0bb8ad725 100644 --- a/drivers/ssb/driver_extif.c +++ b/drivers/ssb/driver_extif.c @@ -118,6 +118,13 @@ void ssb_extif_watchdog_timer_set(struct ssb_extif *extif, extif_write32(extif, SSB_EXTIF_WATCHDOG, ticks); } +void ssb_extif_init(struct ssb_extif *extif) +{ + if (!extif->dev) + return; /* We don't have a Extif core */ + spin_lock_init(&extif->gpio_lock); +} + u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask) { return extif_read32(extif, SSB_EXTIF_GPIO_IN) & mask; @@ -125,22 +132,50 @@ u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask) u32 ssb_extif_gpio_out(struct ssb_extif *extif, u32 mask, u32 value) { - return extif_write32_masked(extif, SSB_EXTIF_GPIO_OUT(0), + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&extif->gpio_lock, flags); + res = extif_write32_masked(extif, SSB_EXTIF_GPIO_OUT(0), mask, value); + spin_unlock_irqrestore(&extif->gpio_lock, flags); + + return res; } u32 ssb_extif_gpio_outen(struct ssb_extif *extif, u32 mask, u32 value) { - return extif_write32_masked(extif, SSB_EXTIF_GPIO_OUTEN(0), + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&extif->gpio_lock, flags); + res = extif_write32_masked(extif, SSB_EXTIF_GPIO_OUTEN(0), mask, value); + spin_unlock_irqrestore(&extif->gpio_lock, flags); + + return res; } u32 ssb_extif_gpio_polarity(struct ssb_extif *extif, u32 mask, u32 value) { - return extif_write32_masked(extif, SSB_EXTIF_GPIO_INTPOL, mask, value); + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&extif->gpio_lock, flags); + res = extif_write32_masked(extif, SSB_EXTIF_GPIO_INTPOL, mask, value); + spin_unlock_irqrestore(&extif->gpio_lock, flags); + + return res; } u32 ssb_extif_gpio_intmask(struct ssb_extif *extif, u32 mask, u32 value) { - return extif_write32_masked(extif, SSB_EXTIF_GPIO_INTMASK, mask, value); + unsigned long flags; + u32 res = 0; + + spin_lock_irqsave(&extif->gpio_lock, flags); + res = extif_write32_masked(extif, SSB_EXTIF_GPIO_INTMASK, mask, value); + spin_unlock_irqrestore(&extif->gpio_lock, flags); + + return res; } diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index df0f145c22fc..6fe2d102734a 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -796,6 +796,7 @@ static int __devinit ssb_bus_register(struct ssb_bus *bus, if (err) goto err_pcmcia_exit; ssb_chipcommon_init(&bus->chipco); + ssb_extif_init(&bus->extif); ssb_mipscore_init(&bus->mipscore); err = ssb_fetch_invariants(bus, get_invariants); if (err) { diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h index a305550b4b65..d6a1ba9394d9 100644 --- a/drivers/ssb/ssb_private.h +++ b/drivers/ssb/ssb_private.h @@ -211,4 +211,12 @@ static inline void b43_pci_ssb_bridge_exit(void) extern u32 ssb_pmu_get_cpu_clock(struct ssb_chipcommon *cc); extern u32 ssb_pmu_get_controlclock(struct ssb_chipcommon *cc); +#ifdef CONFIG_SSB_DRIVER_EXTIF +extern void ssb_extif_init(struct ssb_extif *extif); +#else +static inline void ssb_extif_init(struct ssb_extif *extif) +{ +} +#endif + #endif /* LINUX_SSB_PRIVATE_H_ */ diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h index c8d07c95d76e..30b694345d47 100644 --- a/include/linux/ssb/ssb_driver_chipcommon.h +++ b/include/linux/ssb/ssb_driver_chipcommon.h @@ -590,6 +590,7 @@ struct ssb_chipcommon { u32 status; /* Fast Powerup Delay constant */ u16 fast_pwrup_delay; + spinlock_t gpio_lock; struct ssb_chipcommon_pmu pmu; }; diff --git a/include/linux/ssb/ssb_driver_extif.h b/include/linux/ssb/ssb_driver_extif.h index 91161f0aa22b..bd2306854a91 100644 --- a/include/linux/ssb/ssb_driver_extif.h +++ b/include/linux/ssb/ssb_driver_extif.h @@ -158,6 +158,7 @@ struct ssb_extif { struct ssb_device *dev; + spinlock_t gpio_lock; }; static inline bool ssb_extif_available(struct ssb_extif *extif) -- cgit v1.2.3 From ec43b08b5733494ad88aa618ecdf534320dd8207 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 20 Nov 2012 22:24:33 +0000 Subject: ssb: add GPIO driver Register a GPIO driver to access the GPIOs provided by the chip. The GPIOs of the SoC should always start at 0 and the other GPIOs could start at a random position. There is just one SoC in a system and when they start at 0 the number is predictable. Signed-off-by: Hauke Mehrtens Patchwork: http://patchwork.linux-mips.org/patch/4591 Acked-by: Florian Fainelli --- drivers/ssb/Kconfig | 9 +++ drivers/ssb/Makefile | 1 + drivers/ssb/driver_gpio.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/ssb/main.c | 6 ++ drivers/ssb/ssb_private.h | 9 +++ include/linux/ssb/ssb.h | 4 ++ 6 files changed, 205 insertions(+) create mode 100644 drivers/ssb/driver_gpio.c (limited to 'include/linux') diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index 42cdaa9a4d8a..ff3c8a21f10d 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -160,4 +160,13 @@ config SSB_DRIVER_GIGE If unsure, say N +config SSB_DRIVER_GPIO + bool "SSB GPIO driver" + depends on SSB + select GPIOLIB + help + Driver to provide access to the GPIO pins on the bus. + + If unsure, say N + endmenu diff --git a/drivers/ssb/Makefile b/drivers/ssb/Makefile index 656e58b92618..9159ba77c388 100644 --- a/drivers/ssb/Makefile +++ b/drivers/ssb/Makefile @@ -15,6 +15,7 @@ ssb-$(CONFIG_SSB_DRIVER_MIPS) += driver_mipscore.o ssb-$(CONFIG_SSB_DRIVER_EXTIF) += driver_extif.o ssb-$(CONFIG_SSB_DRIVER_PCICORE) += driver_pcicore.o ssb-$(CONFIG_SSB_DRIVER_GIGE) += driver_gige.o +ssb-$(CONFIG_SSB_DRIVER_GPIO) += driver_gpio.o # b43 pci-ssb-bridge driver # Not strictly a part of SSB, but kept here for convenience diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c new file mode 100644 index 000000000000..97ac0a38e3d0 --- /dev/null +++ b/drivers/ssb/driver_gpio.c @@ -0,0 +1,176 @@ +/* + * Sonics Silicon Backplane + * GPIO driver + * + * Copyright 2011, Broadcom Corporation + * Copyright 2012, Hauke Mehrtens + * + * Licensed under the GNU/GPL. See COPYING for details. + */ + +#include +#include +#include + +#include "ssb_private.h" + +static struct ssb_bus *ssb_gpio_get_bus(struct gpio_chip *chip) +{ + return container_of(chip, struct ssb_bus, gpio); +} + +static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + return !!ssb_chipco_gpio_in(&bus->chipco, 1 << gpio); +} + +static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio, + int value) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + ssb_chipco_gpio_out(&bus->chipco, 1 << gpio, value ? 1 << gpio : 0); +} + +static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip, + unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + ssb_chipco_gpio_outen(&bus->chipco, 1 << gpio, 0); + return 0; +} + +static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip, + unsigned gpio, int value) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + ssb_chipco_gpio_outen(&bus->chipco, 1 << gpio, 1 << gpio); + ssb_chipco_gpio_out(&bus->chipco, 1 << gpio, value ? 1 << gpio : 0); + return 0; +} + +static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + ssb_chipco_gpio_control(&bus->chipco, 1 << gpio, 0); + /* clear pulldown */ + ssb_chipco_gpio_pulldown(&bus->chipco, 1 << gpio, 0); + /* Set pullup */ + ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 1 << gpio); + + return 0; +} + +static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + /* clear pullup */ + ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 0); +} + +static int ssb_gpio_chipco_init(struct ssb_bus *bus) +{ + struct gpio_chip *chip = &bus->gpio; + + chip->label = "ssb_chipco_gpio"; + chip->owner = THIS_MODULE; + chip->request = ssb_gpio_chipco_request; + chip->free = ssb_gpio_chipco_free; + chip->get = ssb_gpio_chipco_get_value; + chip->set = ssb_gpio_chipco_set_value; + chip->direction_input = ssb_gpio_chipco_direction_input; + chip->direction_output = ssb_gpio_chipco_direction_output; + chip->ngpio = 16; + /* There is just one SoC in one device and its GPIO addresses should be + * deterministic to address them more easily. The other buses could get + * a random base number. */ + if (bus->bustype == SSB_BUSTYPE_SSB) + chip->base = 0; + else + chip->base = -1; + + return gpiochip_add(chip); +} + +#ifdef CONFIG_SSB_DRIVER_EXTIF + +static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + return !!ssb_extif_gpio_in(&bus->extif, 1 << gpio); +} + +static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio, + int value) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + ssb_extif_gpio_out(&bus->extif, 1 << gpio, value ? 1 << gpio : 0); +} + +static int ssb_gpio_extif_direction_input(struct gpio_chip *chip, + unsigned gpio) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + ssb_extif_gpio_outen(&bus->extif, 1 << gpio, 0); + return 0; +} + +static int ssb_gpio_extif_direction_output(struct gpio_chip *chip, + unsigned gpio, int value) +{ + struct ssb_bus *bus = ssb_gpio_get_bus(chip); + + ssb_extif_gpio_outen(&bus->extif, 1 << gpio, 1 << gpio); + ssb_extif_gpio_out(&bus->extif, 1 << gpio, value ? 1 << gpio : 0); + return 0; +} + +static int ssb_gpio_extif_init(struct ssb_bus *bus) +{ + struct gpio_chip *chip = &bus->gpio; + + chip->label = "ssb_extif_gpio"; + chip->owner = THIS_MODULE; + chip->get = ssb_gpio_extif_get_value; + chip->set = ssb_gpio_extif_set_value; + chip->direction_input = ssb_gpio_extif_direction_input; + chip->direction_output = ssb_gpio_extif_direction_output; + chip->ngpio = 5; + /* There is just one SoC in one device and its GPIO addresses should be + * deterministic to address them more easily. The other buses could get + * a random base number. */ + if (bus->bustype == SSB_BUSTYPE_SSB) + chip->base = 0; + else + chip->base = -1; + + return gpiochip_add(chip); +} + +#else +static int ssb_gpio_extif_init(struct ssb_bus *bus) +{ + return -ENOTSUPP; +} +#endif + +int ssb_gpio_init(struct ssb_bus *bus) +{ + if (ssb_chipco_available(&bus->chipco)) + return ssb_gpio_chipco_init(bus); + else if (ssb_extif_available(&bus->extif)) + return ssb_gpio_extif_init(bus); + else + SSB_WARN_ON(1); + + return -1; +} diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index 6fe2d102734a..87f0ddf4f3f3 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -798,6 +798,12 @@ static int __devinit ssb_bus_register(struct ssb_bus *bus, ssb_chipcommon_init(&bus->chipco); ssb_extif_init(&bus->extif); ssb_mipscore_init(&bus->mipscore); + err = ssb_gpio_init(bus); + if (err == -ENOTSUPP) + ssb_dprintk(KERN_DEBUG PFX "GPIO driver not activated\n"); + else if (err) + ssb_dprintk(KERN_ERR PFX + "Error registering GPIO driver: %i\n", err); err = ssb_fetch_invariants(bus, get_invariants); if (err) { ssb_bus_may_powerdown(bus); diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h index d6a1ba9394d9..463b76a2140b 100644 --- a/drivers/ssb/ssb_private.h +++ b/drivers/ssb/ssb_private.h @@ -219,4 +219,13 @@ static inline void ssb_extif_init(struct ssb_extif *extif) } #endif +#ifdef CONFIG_SSB_DRIVER_GPIO +extern int ssb_gpio_init(struct ssb_bus *bus); +#else /* CONFIG_SSB_DRIVER_GPIO */ +static inline int ssb_gpio_init(struct ssb_bus *bus) +{ + return -ENOTSUPP; +} +#endif /* CONFIG_SSB_DRIVER_GPIO */ + #endif /* LINUX_SSB_PRIVATE_H_ */ diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index bb674c02f306..3862a5bbd73b 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -433,6 +434,9 @@ struct ssb_bus { /* Lock for GPIO register access. */ spinlock_t gpio_lock; #endif /* EMBEDDED */ +#ifdef CONFIG_SSB_DRIVER_GPIO + struct gpio_chip gpio; +#endif /* DRIVER_GPIO */ /* Internal-only stuff follows. Do not touch. */ struct list_head list; -- cgit v1.2.3 From 743179849015dc71bb2ea63d8cd4bfa7fdfb4bc6 Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Thu, 15 Nov 2012 20:19:57 +0100 Subject: of_spi: add generic binding support to specify cs gpio This will allow to use gpio for chip select with no modification in the driver binding When use the cs-gpios, the gpio number will be passed via the cs_gpio field and the number of chip select will automatically increased with max(hw cs, gpio cs). So if for example the controller has 2 CS lines, and the cs-gpios property looks like this: cs-gpios = <&gpio1 0 0> <0> <&gpio1 1 0> <&gpio1 2 0>; Then it should be configured so that num_chipselect = 4 with the following mapping: cs0 : &gpio1 0 0 cs1 : native cs2 : &gpio1 1 0 cs3 : &gpio1 2 0 Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Cc: devicetree-discuss@lists.ozlabs.org Cc: spi-devel-general@lists.sourceforge.net Signed-off-by: Richard Genoud [grant.likely: fixed up type of cs count so min() can do type checking] Signed-off-by: Grant Likely --- Documentation/devicetree/bindings/spi/spi-bus.txt | 20 +++++++++ drivers/spi/spi.c | 54 +++++++++++++++++++++-- include/linux/spi/spi.h | 3 ++ 3 files changed, 74 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/spi/spi-bus.txt b/Documentation/devicetree/bindings/spi/spi-bus.txt index d2c33d0f533e..77a8b0d39b54 100644 --- a/Documentation/devicetree/bindings/spi/spi-bus.txt +++ b/Documentation/devicetree/bindings/spi/spi-bus.txt @@ -12,6 +12,7 @@ The SPI master node requires the following properties: - #size-cells - should be zero. - compatible - name of SPI bus controller following generic names recommended practice. +- cs-gpios - (optional) gpios chip select. No other properties are required in the SPI bus node. It is assumed that a driver for an SPI bus device will understand that it is an SPI bus. However, the binding does not attempt to define the specific method for @@ -24,6 +25,22 @@ support describing the chip select layout. Optional property: - num-cs : total number of chipselects +If cs-gpios is used the number of chip select will automatically increased +with max(cs-gpios > hw cs) + +So if for example the controller has 2 CS lines, and the cs-gpios +property looks like this: + +cs-gpios = <&gpio1 0 0> <0> <&gpio1 1 0> <&gpio1 2 0>; + +Then it should be configured so that num_chipselect = 4 with the +following mapping: + +cs0 : &gpio1 0 0 +cs1 : native +cs2 : &gpio1 1 0 +cs3 : &gpio1 2 0 + SPI slave nodes must be children of the SPI master node and can contain the following properties. - reg - (required) chip select address of device. @@ -37,6 +54,9 @@ contain the following properties. - spi-cs-high - (optional) Empty property indicating device requires chip select active high +If a gpio chipselect is used for the SPI slave the gpio number will be passed +via the cs_gpio + SPI example for an MPC5200 SPI bus: spi@f00 { #address-cells = <1>; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 84c2861d6f4d..1587a4a5ff41 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -327,6 +328,7 @@ struct spi_device *spi_alloc_device(struct spi_master *master) spi->dev.parent = &master->dev; spi->dev.bus = &spi_bus_type; spi->dev.release = spidev_release; + spi->cs_gpio = -EINVAL; device_initialize(&spi->dev); return spi; } @@ -344,15 +346,16 @@ EXPORT_SYMBOL_GPL(spi_alloc_device); int spi_add_device(struct spi_device *spi) { static DEFINE_MUTEX(spi_add_lock); - struct device *dev = spi->master->dev.parent; + struct spi_master *master = spi->master; + struct device *dev = master->dev.parent; struct device *d; int status; /* Chipselects are numbered 0..max; validate. */ - if (spi->chip_select >= spi->master->num_chipselect) { + if (spi->chip_select >= master->num_chipselect) { dev_err(dev, "cs%d >= max %d\n", spi->chip_select, - spi->master->num_chipselect); + master->num_chipselect); return -EINVAL; } @@ -376,6 +379,9 @@ int spi_add_device(struct spi_device *spi) goto done; } + if (master->cs_gpios) + spi->cs_gpio = master->cs_gpios[spi->chip_select]; + /* Drivers may modify this initial i/o setup, but will * normally rely on the device being setup. Devices * using SPI_CS_HIGH can't coexist well otherwise... @@ -946,6 +952,44 @@ struct spi_master *spi_alloc_master(struct device *dev, unsigned size) } EXPORT_SYMBOL_GPL(spi_alloc_master); +#ifdef CONFIG_OF +static int of_spi_register_master(struct spi_master *master) +{ + u16 nb; + int i, *cs; + struct device_node *np = master->dev.of_node; + + if (!np) + return 0; + + nb = of_gpio_named_count(np, "cs-gpios"); + master->num_chipselect = max(nb, master->num_chipselect); + + if (nb < 1) + return 0; + + cs = devm_kzalloc(&master->dev, + sizeof(int) * master->num_chipselect, + GFP_KERNEL); + master->cs_gpios = cs; + + if (!master->cs_gpios) + return -ENOMEM; + + memset(cs, -EINVAL, master->num_chipselect); + + for (i = 0; i < nb; i++) + cs[i] = of_get_named_gpio(np, "cs-gpios", i); + + return 0; +} +#else +static int of_spi_register_master(struct spi_master *master) +{ + return 0; +} +#endif + /** * spi_register_master - register SPI master controller * @master: initialized master, originally from spi_alloc_master() @@ -977,6 +1021,10 @@ int spi_register_master(struct spi_master *master) if (!dev) return -ENODEV; + status = of_spi_register_master(master); + if (status) + return status; + /* even if it's just one always-selected device, there must * be at least one chipselect */ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index fa702aeb5038..f62918946d86 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -90,6 +90,7 @@ struct spi_device { void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; + int cs_gpio; /* chip select gpio */ /* * likely need more hooks for more protocol options affecting how @@ -362,6 +363,8 @@ struct spi_master { int (*transfer_one_message)(struct spi_master *master, struct spi_message *mesg); int (*unprepare_transfer_hardware)(struct spi_master *master); + /* gpio chip select */ + int *cs_gpios; }; static inline void *spi_master_get_devdata(struct spi_master *master) -- cgit v1.2.3 From df072eb97dcfb819390227649f6b7c07a90aa2df Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Tue, 20 Nov 2012 19:22:33 +0900 Subject: extcon: kernel_doc style fix Corrected kernel_doc entries. Signed-off-by: MyungJoo Ham --- include/linux/extcon.h | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 54c00ce9ce7a..fcb51c88319f 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -76,12 +76,12 @@ struct extcon_cable; /** * struct extcon_dev - An extcon device represents one external connector. - * @name The name of this extcon device. Parent device name is used + * @name: The name of this extcon device. Parent device name is used * if NULL. - * @supported_cable Array of supported cable names ending with NULL. + * @supported_cable: Array of supported cable names ending with NULL. * If supported_cable is NULL, cable name related APIs * are disabled. - * @mutually_exclusive Array of mutually exclusive set of cables that cannot + * @mutually_exclusive: Array of mutually exclusive set of cables that cannot * be attached simultaneously. The array should be * ending with NULL or be NULL (no mutually exclusive * cables). For example, if it is { 0x7, 0x30, 0}, then, @@ -89,21 +89,21 @@ struct extcon_cable; * be attached simulataneously. {0x7, 0} is equivalent to * {0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there * can be no simultaneous connections. - * @print_name An optional callback to override the method to print the + * @print_name: An optional callback to override the method to print the * name of the extcon device. - * @print_state An optional callback to override the method to print the + * @print_state: An optional callback to override the method to print the * status of the extcon device. - * @dev Device of this extcon. Do not provide at register-time. - * @state Attach/detach state of this extcon. Do not provide at + * @dev: Device of this extcon. Do not provide at register-time. + * @state: Attach/detach state of this extcon. Do not provide at * register-time - * @nh Notifier for the state change events from this extcon - * @entry To support list of extcon devices so that users can search + * @nh: Notifier for the state change events from this extcon + * @entry: To support list of extcon devices so that users can search * for extcon devices based on the extcon name. - * @lock - * @max_supported Internal value to store the number of cables. - * @extcon_dev_type Device_type struct to provide attribute_groups + * @lock: + * @max_supported: Internal value to store the number of cables. + * @extcon_dev_type: Device_type struct to provide attribute_groups * customized for each extcon device. - * @cables Sysfs subdirectories. Each represents one cable. + * @cables: Sysfs subdirectories. Each represents one cable. * * In most cases, users only need to provide "User initializing data" of * this struct when registering an extcon. In some exceptional cases, @@ -139,12 +139,12 @@ struct extcon_dev { /** * struct extcon_cable - An internal data for each cable of extcon device. - * @edev The extcon device - * @cable_index Index of this cable in the edev - * @attr_g Attribute group for the cable - * @attr_name "name" sysfs entry - * @attr_state "state" sysfs entry - * @attrs Array pointing to attr_name and attr_state for attr_g + * @edev: The extcon device + * @cable_index: Index of this cable in the edev + * @attr_g: Attribute group for the cable + * @attr_name: "name" sysfs entry + * @attr_state: "state" sysfs entry + * @attrs: Array pointing to attr_name and attr_state for attr_g */ struct extcon_cable { struct extcon_dev *edev; @@ -160,11 +160,11 @@ struct extcon_cable { /** * struct extcon_specific_cable_nb - An internal data for * extcon_register_interest(). - * @internal_nb a notifier block bridging extcon notifier and cable notifier. - * @user_nb user provided notifier block for events from a specific cable. - * @cable_index the target cable. - * @edev the target extcon device. - * @previous_value the saved previous event value. + * @internal_nb: a notifier block bridging extcon notifier and cable notifier. + * @user_nb: user provided notifier block for events from a specific cable. + * @cable_index: the target cable. + * @edev: the target extcon device. + * @previous_value: the saved previous event value. */ struct extcon_specific_cable_nb { struct notifier_block internal_nb; -- cgit v1.2.3 From 8d4b9e3182634d8b5afb5a144a8c6c24b187bcc1 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 12 Nov 2012 13:03:20 +0100 Subject: bcma: export PLL reading function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required by NAND flash driver for initializing wait counters. Signed-off-by: Rafał Miłecki Signed-off-by: Artem Bityutskiy --- drivers/bcma/driver_chipcommon_pmu.c | 3 ++- include/linux/bcma/bcma.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c index 201faf106b3f..657f23517481 100644 --- a/drivers/bcma/driver_chipcommon_pmu.c +++ b/drivers/bcma/driver_chipcommon_pmu.c @@ -13,12 +13,13 @@ #include #include -static u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset) +u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset) { bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset); bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR); return bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA); } +EXPORT_SYMBOL_GPL(bcma_chipco_pll_read); void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value) { diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 4180eb78d575..4fb6bd7941d7 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -345,6 +345,7 @@ extern void bcma_core_set_clockmode(struct bcma_device *core, enum bcma_clkmode clkmode); extern void bcma_core_pll_ctl(struct bcma_device *core, u32 req, u32 status, bool on); +extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset); #define BCMA_DMA_TRANSLATION_MASK 0xC0000000 #define BCMA_DMA_TRANSLATION_NONE 0x00000000 #define BCMA_DMA_TRANSLATION_DMA32_CMT 0x40000000 /* Client Mode Translation for 32-bit DMA */ -- cgit v1.2.3 From 83af24027b3df1af5c5a9aa9adcdcfeb3429d3be Mon Sep 17 00:00:00 2001 From: "Philip, Avinash" Date: Wed, 21 Nov 2012 13:10:44 +0530 Subject: pwm: Device tree support for PWM polarity Add support for encoding PWM properties in bit encoded form with of_pwm_xlate_with_flags() function support. Platforms require platform specific PWM properties has to populate in 3rd cell of the pwm-specifier and PWM driver should also set .of_xlate support with this function. Currently PWM property polarity encoded in bit position 0 of the third cell in pwm-specifier. Signed-off-by: Philip, Avinash Acked-by: Grant Likely Signed-off-by: Thierry Reding --- Documentation/devicetree/bindings/pwm/pwm.txt | 17 +++++++++++++--- drivers/pwm/core.c | 28 +++++++++++++++++++++++++++ include/linux/pwm.h | 3 +++ 3 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/pwm/pwm.txt b/Documentation/devicetree/bindings/pwm/pwm.txt index 73ec962bfe8c..06e67247859a 100644 --- a/Documentation/devicetree/bindings/pwm/pwm.txt +++ b/Documentation/devicetree/bindings/pwm/pwm.txt @@ -37,10 +37,21 @@ device: pwm-names = "backlight"; }; +Note that in the example above, specifying the "pwm-names" is redundant +because the name "backlight" would be used as fallback anyway. + pwm-specifier typically encodes the chip-relative PWM number and the PWM -period in nanoseconds. Note that in the example above, specifying the -"pwm-names" is redundant because the name "backlight" would be used as -fallback anyway. +period in nanoseconds. + +Optionally, the pwm-specifier can encode a number of flags in a third cell: +- bit 0: PWM signal polarity (0: normal polarity, 1: inverse polarity) + +Example with optional PWM specifier for inverse polarity + + bl: backlight { + pwms = <&pwm 0 5000000 1>; + pwm-names = "backlight"; + }; 2) PWM controller nodes ----------------------- diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index f5acdaa52707..780cb6b8a8f0 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -32,6 +32,9 @@ #define MAX_PWMS 1024 +/* flags in the third cell of the DT PWM specifier */ +#define PWM_SPEC_POLARITY (1 << 0) + static DEFINE_MUTEX(pwm_lookup_lock); static LIST_HEAD(pwm_lookup_list); static DEFINE_MUTEX(pwm_lock); @@ -129,6 +132,31 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) return 0; } +struct pwm_device * +of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args) +{ + struct pwm_device *pwm; + + if (pc->of_pwm_n_cells < 3) + return ERR_PTR(-EINVAL); + + if (args->args[0] >= pc->npwm) + return ERR_PTR(-EINVAL); + + pwm = pwm_request_from_chip(pc, args->args[0], NULL); + if (IS_ERR(pwm)) + return pwm; + + pwm_set_period(pwm, args->args[1]); + + if (args->args[2] & PWM_SPEC_POLARITY) + pwm_set_polarity(pwm, PWM_POLARITY_INVERSED); + else + pwm_set_polarity(pwm, PWM_POLARITY_NORMAL); + + return pwm; +} + static struct pwm_device * of_pwm_simple_xlate(struct pwm_chip *pc, const struct of_phandle_args *args) { diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 112b31436848..6d661f32e0e4 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -171,6 +171,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label); +struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, + const struct of_phandle_args *args); + struct pwm_device *pwm_get(struct device *dev, const char *consumer); void pwm_put(struct pwm_device *pwm); -- cgit v1.2.3 From acad189b08456722ca4a8984218d6f38f4563cbc Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 22 Nov 2012 11:12:04 +0100 Subject: mfd: Add an AS3711 PMIC MFD driver AS3711 is a PMIC with multiple DCDC and LDO power supplies, GPIOs, an RTC, a battery charger and a general purpose ADC. This patch adds support for the MFD with support for a regulator driver and a backlight driver. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 9 ++ drivers/mfd/Makefile | 1 + drivers/mfd/as3711.c | 217 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/as3711.h | 126 ++++++++++++++++++++++++++ 4 files changed, 353 insertions(+) create mode 100644 drivers/mfd/as3711.c create mode 100644 include/linux/mfd/as3711.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index f5b839b718aa..475c26696b95 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1103,6 +1103,15 @@ config MFD_RETU Retu is a multi-function device found on Nokia Internet Tablets (770, N800 and N810). +config MFD_AS3711 + bool "Support for AS3711" + select MFD_CORE + select REGMAP_I2C + select REGMAP_IRQ + depends on I2C=y + help + Support for the AS3711 PMIC from AMS + endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 2689c8a0d781..f2216dfd963c 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -146,3 +146,4 @@ obj-$(CONFIG_MFD_SEC_CORE) += sec-core.o sec-irq.o obj-$(CONFIG_MFD_SYSCON) += syscon.o obj-$(CONFIG_MFD_LM3533) += lm3533-core.o lm3533-ctrlbank.o obj-$(CONFIG_MFD_RETU) += retu-mfd.o +obj-$(CONFIG_MFD_AS3711) += as3711.o diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c new file mode 100644 index 000000000000..e994c9691124 --- /dev/null +++ b/drivers/mfd/as3711.c @@ -0,0 +1,217 @@ +/* + * AS3711 PMIC MFC driver + * + * Copyright (C) 2012 Renesas Electronics Corporation + * Author: Guennadi Liakhovetski, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License as + * published by the Free Software Foundation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + AS3711_REGULATOR, + AS3711_BACKLIGHT, +}; + +/* + * Ok to have it static: it is only used during probing and multiple I2C devices + * cannot be probed simultaneously. Just make sure to avoid stale data. + */ +static struct mfd_cell as3711_subdevs[] = { + [AS3711_REGULATOR] = {.name = "as3711-regulator",}, + [AS3711_BACKLIGHT] = {.name = "as3711-backlight",}, +}; + +static bool as3711_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case AS3711_GPIO_SIGNAL_IN: + case AS3711_INTERRUPT_STATUS_1: + case AS3711_INTERRUPT_STATUS_2: + case AS3711_INTERRUPT_STATUS_3: + case AS3711_CHARGER_STATUS_1: + case AS3711_CHARGER_STATUS_2: + case AS3711_REG_STATUS: + return true; + } + return false; +} + +static bool as3711_precious_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case AS3711_INTERRUPT_STATUS_1: + case AS3711_INTERRUPT_STATUS_2: + case AS3711_INTERRUPT_STATUS_3: + return true; + } + return false; +} + +static bool as3711_readable_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case AS3711_SD_1_VOLTAGE: + case AS3711_SD_2_VOLTAGE: + case AS3711_SD_3_VOLTAGE: + case AS3711_SD_4_VOLTAGE: + case AS3711_LDO_1_VOLTAGE: + case AS3711_LDO_2_VOLTAGE: + case AS3711_LDO_3_VOLTAGE: + case AS3711_LDO_4_VOLTAGE: + case AS3711_LDO_5_VOLTAGE: + case AS3711_LDO_6_VOLTAGE: + case AS3711_LDO_7_VOLTAGE: + case AS3711_LDO_8_VOLTAGE: + case AS3711_SD_CONTROL: + case AS3711_GPIO_SIGNAL_OUT: + case AS3711_GPIO_SIGNAL_IN: + case AS3711_SD_CONTROL_1: + case AS3711_SD_CONTROL_2: + case AS3711_CURR_CONTROL: + case AS3711_CURR1_VALUE: + case AS3711_CURR2_VALUE: + case AS3711_CURR3_VALUE: + case AS3711_STEPUP_CONTROL_1: + case AS3711_STEPUP_CONTROL_2: + case AS3711_STEPUP_CONTROL_4: + case AS3711_STEPUP_CONTROL_5: + case AS3711_REG_STATUS: + case AS3711_INTERRUPT_STATUS_1: + case AS3711_INTERRUPT_STATUS_2: + case AS3711_INTERRUPT_STATUS_3: + case AS3711_CHARGER_STATUS_1: + case AS3711_CHARGER_STATUS_2: + case AS3711_ASIC_ID_1: + case AS3711_ASIC_ID_2: + return true; + } + return false; +} + +static const struct regmap_config as3711_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .volatile_reg = as3711_volatile_reg, + .readable_reg = as3711_readable_reg, + .precious_reg = as3711_precious_reg, + .max_register = AS3711_MAX_REGS, + .num_reg_defaults_raw = AS3711_MAX_REGS, + .cache_type = REGCACHE_RBTREE, +}; + +static int as3711_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct as3711 *as3711; + struct as3711_platform_data *pdata = client->dev.platform_data; + unsigned int id1, id2; + int ret; + + if (!pdata) + dev_dbg(&client->dev, "Platform data not found\n"); + + as3711 = devm_kzalloc(&client->dev, sizeof(struct as3711), GFP_KERNEL); + if (!as3711) { + dev_err(&client->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + + as3711->dev = &client->dev; + i2c_set_clientdata(client, as3711); + + if (client->irq) + dev_notice(&client->dev, "IRQ not supported yet\n"); + + as3711->regmap = devm_regmap_init_i2c(client, &as3711_regmap_config); + if (IS_ERR(as3711->regmap)) { + ret = PTR_ERR(as3711->regmap); + dev_err(&client->dev, "regmap initialization failed: %d\n", ret); + return ret; + } + + ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_1, &id1); + if (!ret) + ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_2, &id2); + if (ret < 0) { + dev_err(&client->dev, "regmap_read() failed: %d\n", ret); + return ret; + } + if (id1 != 0x8b) + return -ENODEV; + dev_info(as3711->dev, "AS3711 detected: %x:%x\n", id1, id2); + + /* We can reuse as3711_subdevs[], it will be copied in mfd_add_devices() */ + if (pdata) { + as3711_subdevs[AS3711_REGULATOR].platform_data = &pdata->regulator; + as3711_subdevs[AS3711_REGULATOR].pdata_size = sizeof(pdata->regulator); + as3711_subdevs[AS3711_BACKLIGHT].platform_data = &pdata->backlight; + as3711_subdevs[AS3711_BACKLIGHT].pdata_size = sizeof(pdata->backlight); + } else { + as3711_subdevs[AS3711_REGULATOR].platform_data = NULL; + as3711_subdevs[AS3711_REGULATOR].pdata_size = 0; + as3711_subdevs[AS3711_BACKLIGHT].platform_data = NULL; + as3711_subdevs[AS3711_BACKLIGHT].pdata_size = 0; + } + + ret = mfd_add_devices(as3711->dev, -1, as3711_subdevs, + ARRAY_SIZE(as3711_subdevs), NULL, 0, NULL); + if (ret < 0) + dev_err(&client->dev, "add mfd devices failed: %d\n", ret); + + return ret; +} + +static int as3711_i2c_remove(struct i2c_client *client) +{ + struct as3711 *as3711 = i2c_get_clientdata(client); + + mfd_remove_devices(as3711->dev); + return 0; +} + +static const struct i2c_device_id as3711_i2c_id[] = { + {.name = "as3711", .driver_data = 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, as3711_i2c_id); + +static struct i2c_driver as3711_i2c_driver = { + .driver = { + .name = "as3711", + .owner = THIS_MODULE, + }, + .probe = as3711_i2c_probe, + .remove = as3711_i2c_remove, + .id_table = as3711_i2c_id, +}; + +static int __init as3711_i2c_init(void) +{ + return i2c_add_driver(&as3711_i2c_driver); +} +/* Initialise early */ +subsys_initcall(as3711_i2c_init); + +static void __exit as3711_i2c_exit(void) +{ + i2c_del_driver(&as3711_i2c_driver); +} +module_exit(as3711_i2c_exit); + +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_DESCRIPTION("AS3711 PMIC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/as3711.h b/include/linux/mfd/as3711.h new file mode 100644 index 000000000000..38452ce1e892 --- /dev/null +++ b/include/linux/mfd/as3711.h @@ -0,0 +1,126 @@ +/* + * AS3711 PMIC MFC driver header + * + * Copyright (C) 2012 Renesas Electronics Corporation + * Author: Guennadi Liakhovetski, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License as + * published by the Free Software Foundation + */ + +#ifndef MFD_AS3711_H +#define MFD_AS3711_H + +/* + * Client data + */ + +/* Register addresses */ +#define AS3711_SD_1_VOLTAGE 0 /* Digital Step-Down */ +#define AS3711_SD_2_VOLTAGE 1 +#define AS3711_SD_3_VOLTAGE 2 +#define AS3711_SD_4_VOLTAGE 3 +#define AS3711_LDO_1_VOLTAGE 4 /* Analog LDO */ +#define AS3711_LDO_2_VOLTAGE 5 +#define AS3711_LDO_3_VOLTAGE 6 /* Digital LDO */ +#define AS3711_LDO_4_VOLTAGE 7 +#define AS3711_LDO_5_VOLTAGE 8 +#define AS3711_LDO_6_VOLTAGE 9 +#define AS3711_LDO_7_VOLTAGE 0xa +#define AS3711_LDO_8_VOLTAGE 0xb +#define AS3711_SD_CONTROL 0x10 +#define AS3711_GPIO_SIGNAL_OUT 0x20 +#define AS3711_GPIO_SIGNAL_IN 0x21 +#define AS3711_SD_CONTROL_1 0x30 +#define AS3711_SD_CONTROL_2 0x31 +#define AS3711_CURR_CONTROL 0x40 +#define AS3711_CURR1_VALUE 0x43 +#define AS3711_CURR2_VALUE 0x44 +#define AS3711_CURR3_VALUE 0x45 +#define AS3711_STEPUP_CONTROL_1 0x50 +#define AS3711_STEPUP_CONTROL_2 0x51 +#define AS3711_STEPUP_CONTROL_4 0x53 +#define AS3711_STEPUP_CONTROL_5 0x54 +#define AS3711_REG_STATUS 0x73 +#define AS3711_INTERRUPT_STATUS_1 0x77 +#define AS3711_INTERRUPT_STATUS_2 0x78 +#define AS3711_INTERRUPT_STATUS_3 0x79 +#define AS3711_CHARGER_STATUS_1 0x86 +#define AS3711_CHARGER_STATUS_2 0x87 +#define AS3711_ASIC_ID_1 0x90 +#define AS3711_ASIC_ID_2 0x91 + +#define AS3711_MAX_REGS 0x92 + +/* Regulators */ +enum { + AS3711_REGULATOR_SD_1, + AS3711_REGULATOR_SD_2, + AS3711_REGULATOR_SD_3, + AS3711_REGULATOR_SD_4, + AS3711_REGULATOR_LDO_1, + AS3711_REGULATOR_LDO_2, + AS3711_REGULATOR_LDO_3, + AS3711_REGULATOR_LDO_4, + AS3711_REGULATOR_LDO_5, + AS3711_REGULATOR_LDO_6, + AS3711_REGULATOR_LDO_7, + AS3711_REGULATOR_LDO_8, + + AS3711_REGULATOR_MAX, +}; + +struct device; +struct regmap; + +struct as3711 { + struct device *dev; + struct regmap *regmap; +}; + +#define AS3711_MAX_STEPDOWN 4 +#define AS3711_MAX_STEPUP 2 +#define AS3711_MAX_LDO 8 + +enum as3711_su2_feedback { + AS3711_SU2_VOLTAGE, + AS3711_SU2_CURR1, + AS3711_SU2_CURR2, + AS3711_SU2_CURR3, + AS3711_SU2_CURR_AUTO, +}; + +enum as3711_su2_fbprot { + AS3711_SU2_LX_SD4, + AS3711_SU2_GPIO2, + AS3711_SU2_GPIO3, + AS3711_SU2_GPIO4, +}; + +/* + * Platform data + */ + +struct as3711_regulator_pdata { + struct regulator_init_data *init_data[AS3711_REGULATOR_MAX]; +}; + +struct as3711_bl_pdata { + const char *su1_fb; + int su1_max_uA; + const char *su2_fb; + int su2_max_uA; + enum as3711_su2_feedback su2_feedback; + enum as3711_su2_fbprot su2_fbprot; + bool su2_auto_curr1; + bool su2_auto_curr2; + bool su2_auto_curr3; +}; + +struct as3711_platform_data { + struct as3711_regulator_pdata regulator; + struct as3711_bl_pdata backlight; +}; + +#endif -- cgit v1.2.3 From 1ad7e89940d5ac411928189e1a4a01901dbf590f Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 8 Nov 2012 16:12:25 -0800 Subject: block: store partition_meta_info.uuid as a string This will allow other types of UUID to be stored here, aside from true UUIDs. This also simplifies code that uses this field, since it's usually constructed from a, used as a, or compared to other, strings. Note: A simplistic approach here would be to set uuid_str[36]=0 whenever a /PARTNROFF option was found to be present. However, this modifies the input string, and causes subsequent calls to devt_from_partuuid() not to see the /PARTNROFF option, which causes different results. In order to avoid misleading future maintainers, this parameter is marked const. Signed-off-by: Stephen Warren Cc: Tejun Heo Cc: Will Drewry Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/genhd.c | 8 +------- block/partitions/efi.c | 7 +------ include/linux/genhd.h | 8 ++++++-- init/do_mounts.c | 28 +++++++++++++++++----------- 4 files changed, 25 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 6cace663a80e..b281f3a2d26a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -743,7 +743,6 @@ void __init printk_all_partitions(void) struct hd_struct *part; char name_buf[BDEVNAME_SIZE]; char devt_buf[BDEVT_SIZE]; - char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5]; /* * Don't show empty devices or things that have been @@ -762,16 +761,11 @@ void __init printk_all_partitions(void) while ((part = disk_part_iter_next(&piter))) { bool is_part0 = part == &disk->part0; - uuid_buf[0] = '\0'; - if (part->info) - snprintf(uuid_buf, sizeof(uuid_buf), "%pU", - part->info->uuid); - printk("%s%s %10llu %s %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), (unsigned long long)part_nr_sects_read(part) >> 1 , disk_name(disk, part->partno, name_buf), - uuid_buf); + part->info ? part->info->uuid : ""); if (is_part0) { if (disk->driverfs_dev != NULL && disk->driverfs_dev->driver != NULL) diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 6296b403c67a..b62fb88b8711 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -620,7 +620,6 @@ int efi_partition(struct parsed_partitions *state) gpt_entry *ptes = NULL; u32 i; unsigned ssz = bdev_logical_block_size(state->bdev) / 512; - u8 unparsed_guid[37]; if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { kfree(gpt); @@ -649,11 +648,7 @@ int efi_partition(struct parsed_partitions *state) state->parts[i + 1].flags = ADDPART_FLAG_RAID; info = &state->parts[i + 1].info; - /* Instead of doing a manual swap to big endian, reuse the - * common ASCII hex format as the interim. - */ - efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid); - part_pack_uuid(unparsed_guid, info->uuid); + efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid); /* Naively convert UTF16-LE to 7 bits. */ label_max = min(sizeof(info->volname) - 1, diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4f440b3e89fe..79b8bba19363 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -88,10 +88,14 @@ struct disk_stats { }; #define PARTITION_META_INFO_VOLNAMELTH 64 -#define PARTITION_META_INFO_UUIDLTH 16 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH 37 struct partition_meta_info { - u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */ + char uuid[PARTITION_META_INFO_UUIDLTH]; u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; diff --git a/init/do_mounts.c b/init/do_mounts.c index f8a66424360d..b28ec5819325 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -69,23 +69,28 @@ __setup("ro", readonly); __setup("rw", readwrite); #ifdef CONFIG_BLOCK +struct uuidcmp { + const char *uuid; + int len; +}; + /** * match_dev_by_uuid - callback for finding a partition using its uuid * @dev: device passed in by the caller - * @data: opaque pointer to a 36 byte char array with a UUID + * @data: opaque pointer to the desired struct uuidcmp to match * * Returns 1 if the device matches, and 0 otherwise. */ static int match_dev_by_uuid(struct device *dev, void *data) { - u8 *uuid = data; + struct uuidcmp *cmp = data; struct hd_struct *part = dev_to_part(dev); if (!part->info) goto no_match; - if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid))) - goto no_match; + if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len)) + goto no_match; return 1; no_match: @@ -95,7 +100,7 @@ no_match: /** * devt_from_partuuid - looks up the dev_t of a partition by its UUID - * @uuid: min 36 byte char array containing a hex ascii UUID + * @uuid: char array containing ascii UUID * * The function will return the first partition which contains a matching * UUID value in its partition_meta_info struct. This does not search @@ -106,11 +111,11 @@ no_match: * * Returns the matching dev_t on success or 0 on failure. */ -static dev_t devt_from_partuuid(char *uuid_str) +static dev_t devt_from_partuuid(const char *uuid_str) { dev_t res = 0; + struct uuidcmp cmp; struct device *dev = NULL; - u8 uuid[16]; struct gendisk *disk; struct hd_struct *part; int offset = 0; @@ -118,6 +123,9 @@ static dev_t devt_from_partuuid(char *uuid_str) if (strlen(uuid_str) < 36) goto done; + cmp.uuid = uuid_str; + cmp.len = 36; + /* Check for optional partition number offset attributes. */ if (uuid_str[36]) { char c = 0; @@ -134,10 +142,8 @@ static dev_t devt_from_partuuid(char *uuid_str) } } - /* Pack the requested UUID in the expected format. */ - part_pack_uuid(uuid_str, uuid); - - dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid); + dev = class_find_device(&block_class, NULL, &cmp, + &match_dev_by_uuid); if (!dev) goto done; -- cgit v1.2.3 From 3f9be35bd9090eaa2f68ed9b24efdbf3abcf4b28 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 20 Nov 2012 10:34:56 +0800 Subject: mfd: rc5t583: Fix array subscript is above array bounds I got below build warning while compiling this driver. It's obviously RC5T583_MAX_INTERRUPT_MASK_REGS is 9 but irq_en_add array only has 8 elements. CC drivers/mfd/rc5t583-irq.o drivers/mfd/rc5t583-irq.c: In function 'rc5t583_irq_sync_unlock': drivers/mfd/rc5t583-irq.c:227: warning: array subscript is above array bounds drivers/mfd/rc5t583-irq.c: In function 'rc5t583_irq_init': drivers/mfd/rc5t583-irq.c:349: warning: array subscript is above array bounds Since the number of interrupt enable registers is 8, this patch adds define for RC5T583_MAX_INTERRUPT_EN_REGS to fix this bug. Signed-off-by: Axel Lin Acked-by: Laxman Dewangan Signed-off-by: Samuel Ortiz --- drivers/mfd/rc5t583-irq.c | 2 +- include/linux/mfd/rc5t583.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/rc5t583-irq.c b/drivers/mfd/rc5t583-irq.c index fe00cdd6f83d..b41db5968706 100644 --- a/drivers/mfd/rc5t583-irq.c +++ b/drivers/mfd/rc5t583-irq.c @@ -345,7 +345,7 @@ int rc5t583_irq_init(struct rc5t583 *rc5t583, int irq, int irq_base) mutex_init(&rc5t583->irq_lock); /* Initailize all int register to 0 */ - for (i = 0; i < RC5T583_MAX_INTERRUPT_MASK_REGS; i++) { + for (i = 0; i < RC5T583_MAX_INTERRUPT_EN_REGS; i++) { ret = rc5t583_write(rc5t583->dev, irq_en_add[i], rc5t583->irq_en_reg[i]); if (ret < 0) diff --git a/include/linux/mfd/rc5t583.h b/include/linux/mfd/rc5t583.h index 36c242e52ef1..fd413ccab915 100644 --- a/include/linux/mfd/rc5t583.h +++ b/include/linux/mfd/rc5t583.h @@ -33,6 +33,7 @@ /* Maximum number of main interrupts */ #define MAX_MAIN_INTERRUPT 5 #define RC5T583_MAX_GPEDGE_REG 2 +#define RC5T583_MAX_INTERRUPT_EN_REGS 8 #define RC5T583_MAX_INTERRUPT_MASK_REGS 9 /* Interrupt enable register */ @@ -304,7 +305,7 @@ struct rc5t583 { uint8_t intc_inten_reg; /* For group interrupt bits and address */ - uint8_t irq_en_reg[RC5T583_MAX_INTERRUPT_MASK_REGS]; + uint8_t irq_en_reg[RC5T583_MAX_INTERRUPT_EN_REGS]; /* For gpio edge */ uint8_t gpedge_reg[RC5T583_MAX_GPEDGE_REG]; -- cgit v1.2.3 From df1590d9ae5e37e07e7cf91107e4c2c946ce8bf4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 12 Nov 2012 22:56:03 +0530 Subject: ARM: SPEAr3xx: Shirq: Move shirq controller out of plat/ This patch moves shirq interrupt controllers driver and header file out of plat-spear directory. It is moved to drivers/irqchip/ directory. Signed-off-by: Viresh Kumar --- arch/arm/mach-spear3xx/spear3xx.c | 2 +- arch/arm/plat-spear/Makefile | 2 +- arch/arm/plat-spear/include/plat/shirq.h | 66 ------- arch/arm/plat-spear/shirq.c | 315 ------------------------------ drivers/irqchip/Makefile | 3 +- drivers/irqchip/spear-shirq.c | 316 +++++++++++++++++++++++++++++++ include/linux/irqchip/spear-shirq.h | 64 +++++++ 7 files changed, 384 insertions(+), 384 deletions(-) delete mode 100644 arch/arm/plat-spear/include/plat/shirq.h delete mode 100644 arch/arm/plat-spear/shirq.c create mode 100644 drivers/irqchip/spear-shirq.c create mode 100644 include/linux/irqchip/spear-shirq.h (limited to 'include/linux') diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c index f1aaf5b168b2..38fe95db31a7 100644 --- a/arch/arm/mach-spear3xx/spear3xx.c +++ b/arch/arm/mach-spear3xx/spear3xx.c @@ -15,12 +15,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/arch/arm/plat-spear/Makefile b/arch/arm/plat-spear/Makefile index 2607bd05c525..01e88532a5db 100644 --- a/arch/arm/plat-spear/Makefile +++ b/arch/arm/plat-spear/Makefile @@ -5,5 +5,5 @@ # Common support obj-y := restart.o time.o -obj-$(CONFIG_ARCH_SPEAR3XX) += pl080.o shirq.o +obj-$(CONFIG_ARCH_SPEAR3XX) += pl080.o obj-$(CONFIG_ARCH_SPEAR6XX) += pl080.o diff --git a/arch/arm/plat-spear/include/plat/shirq.h b/arch/arm/plat-spear/include/plat/shirq.h deleted file mode 100644 index c51b355f00de..000000000000 --- a/arch/arm/plat-spear/include/plat/shirq.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * arch/arm/plat-spear/include/plat/shirq.h - * - * SPEAr platform shared irq layer header file - * - * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __PLAT_SHIRQ_H -#define __PLAT_SHIRQ_H - -#include -#include - -/* - * struct shirq_regs: shared irq register configuration - * - * enb_reg: enable register offset - * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt - * status_reg: status register offset - * status_reg_mask: status register valid mask - * clear_reg: clear register offset - * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt - */ -struct shirq_regs { - u32 enb_reg; - u32 reset_to_enb; - u32 status_reg; - u32 clear_reg; - u32 reset_to_clear; -}; - -/* - * struct spear_shirq: shared irq structure - * - * irq: hardware irq number - * irq_base: base irq in linux domain - * irq_nr: no. of shared interrupts in a particular block - * irq_bit_off: starting bit offset in the status register - * invalid_irq: irq group is currently disabled - * base: base address of shared irq register - * regs: register configuration for shared irq block - */ -struct spear_shirq { - u32 irq; - u32 irq_base; - u32 irq_nr; - u32 irq_bit_off; - int invalid_irq; - void __iomem *base; - struct shirq_regs regs; -}; - -int __init spear300_shirq_of_init(struct device_node *np, - struct device_node *parent); -int __init spear310_shirq_of_init(struct device_node *np, - struct device_node *parent); -int __init spear320_shirq_of_init(struct device_node *np, - struct device_node *parent); - -#endif /* __PLAT_SHIRQ_H */ diff --git a/arch/arm/plat-spear/shirq.c b/arch/arm/plat-spear/shirq.c deleted file mode 100644 index 955c7249a5c1..000000000000 --- a/arch/arm/plat-spear/shirq.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * arch/arm/plat-spear/shirq.c - * - * SPEAr platform shared irq layer source file - * - * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static DEFINE_SPINLOCK(lock); - -/* spear300 shared irq registers offsets and masks */ -#define SPEAR300_INT_ENB_MASK_REG 0x54 -#define SPEAR300_INT_STS_MASK_REG 0x58 - -static struct spear_shirq spear300_shirq_ras1 = { - .irq_nr = 9, - .irq_bit_off = 0, - .regs = { - .enb_reg = SPEAR300_INT_ENB_MASK_REG, - .status_reg = SPEAR300_INT_STS_MASK_REG, - .clear_reg = -1, - }, -}; - -static struct spear_shirq *spear300_shirq_blocks[] = { - &spear300_shirq_ras1, -}; - -/* spear310 shared irq registers offsets and masks */ -#define SPEAR310_INT_STS_MASK_REG 0x04 - -static struct spear_shirq spear310_shirq_ras1 = { - .irq_nr = 8, - .irq_bit_off = 0, - .regs = { - .enb_reg = -1, - .status_reg = SPEAR310_INT_STS_MASK_REG, - .clear_reg = -1, - }, -}; - -static struct spear_shirq spear310_shirq_ras2 = { - .irq_nr = 5, - .irq_bit_off = 8, - .regs = { - .enb_reg = -1, - .status_reg = SPEAR310_INT_STS_MASK_REG, - .clear_reg = -1, - }, -}; - -static struct spear_shirq spear310_shirq_ras3 = { - .irq_nr = 1, - .irq_bit_off = 13, - .regs = { - .enb_reg = -1, - .status_reg = SPEAR310_INT_STS_MASK_REG, - .clear_reg = -1, - }, -}; - -static struct spear_shirq spear310_shirq_intrcomm_ras = { - .irq_nr = 3, - .irq_bit_off = 14, - .regs = { - .enb_reg = -1, - .status_reg = SPEAR310_INT_STS_MASK_REG, - .clear_reg = -1, - }, -}; - -static struct spear_shirq *spear310_shirq_blocks[] = { - &spear310_shirq_ras1, - &spear310_shirq_ras2, - &spear310_shirq_ras3, - &spear310_shirq_intrcomm_ras, -}; - -/* spear320 shared irq registers offsets and masks */ -#define SPEAR320_INT_STS_MASK_REG 0x04 -#define SPEAR320_INT_CLR_MASK_REG 0x04 -#define SPEAR320_INT_ENB_MASK_REG 0x08 - -static struct spear_shirq spear320_shirq_ras1 = { - .irq_nr = 3, - .irq_bit_off = 7, - .regs = { - .enb_reg = -1, - .status_reg = SPEAR320_INT_STS_MASK_REG, - .clear_reg = SPEAR320_INT_CLR_MASK_REG, - .reset_to_clear = 1, - }, -}; - -static struct spear_shirq spear320_shirq_ras2 = { - .irq_nr = 1, - .irq_bit_off = 10, - .regs = { - .enb_reg = -1, - .status_reg = SPEAR320_INT_STS_MASK_REG, - .clear_reg = SPEAR320_INT_CLR_MASK_REG, - .reset_to_clear = 1, - }, -}; - -static struct spear_shirq spear320_shirq_ras3 = { - .irq_nr = 3, - .irq_bit_off = 0, - .invalid_irq = 1, - .regs = { - .enb_reg = SPEAR320_INT_ENB_MASK_REG, - .reset_to_enb = 1, - .status_reg = SPEAR320_INT_STS_MASK_REG, - .clear_reg = SPEAR320_INT_CLR_MASK_REG, - .reset_to_clear = 1, - }, -}; - -static struct spear_shirq spear320_shirq_intrcomm_ras = { - .irq_nr = 11, - .irq_bit_off = 11, - .regs = { - .enb_reg = -1, - .status_reg = SPEAR320_INT_STS_MASK_REG, - .clear_reg = SPEAR320_INT_CLR_MASK_REG, - .reset_to_clear = 1, - }, -}; - -static struct spear_shirq *spear320_shirq_blocks[] = { - &spear320_shirq_ras3, - &spear320_shirq_ras1, - &spear320_shirq_ras2, - &spear320_shirq_intrcomm_ras, -}; - -static void shirq_irq_mask_unmask(struct irq_data *d, bool mask) -{ - struct spear_shirq *shirq = irq_data_get_irq_chip_data(d); - u32 val, offset = d->irq - shirq->irq_base; - unsigned long flags; - - if (shirq->regs.enb_reg == -1) - return; - - spin_lock_irqsave(&lock, flags); - val = readl(shirq->base + shirq->regs.enb_reg); - - if (mask ^ shirq->regs.reset_to_enb) - val &= ~(0x1 << shirq->irq_bit_off << offset); - else - val |= 0x1 << shirq->irq_bit_off << offset; - - writel(val, shirq->base + shirq->regs.enb_reg); - spin_unlock_irqrestore(&lock, flags); - -} - -static void shirq_irq_mask(struct irq_data *d) -{ - shirq_irq_mask_unmask(d, 1); -} - -static void shirq_irq_unmask(struct irq_data *d) -{ - shirq_irq_mask_unmask(d, 0); -} - -static struct irq_chip shirq_chip = { - .name = "spear-shirq", - .irq_ack = shirq_irq_mask, - .irq_mask = shirq_irq_mask, - .irq_unmask = shirq_irq_unmask, -}; - -static void shirq_handler(unsigned irq, struct irq_desc *desc) -{ - u32 i, j, val, mask, tmp; - struct irq_chip *chip; - struct spear_shirq *shirq = irq_get_handler_data(irq); - - chip = irq_get_chip(irq); - chip->irq_ack(&desc->irq_data); - - mask = ((0x1 << shirq->irq_nr) - 1) << shirq->irq_bit_off; - while ((val = readl(shirq->base + shirq->regs.status_reg) & - mask)) { - - val >>= shirq->irq_bit_off; - for (i = 0, j = 1; i < shirq->irq_nr; i++, j <<= 1) { - - if (!(j & val)) - continue; - - generic_handle_irq(shirq->irq_base + i); - - /* clear interrupt */ - if (shirq->regs.clear_reg == -1) - continue; - - tmp = readl(shirq->base + shirq->regs.clear_reg); - if (shirq->regs.reset_to_clear) - tmp &= ~(j << shirq->irq_bit_off); - else - tmp |= (j << shirq->irq_bit_off); - writel(tmp, shirq->base + shirq->regs.clear_reg); - } - } - chip->irq_unmask(&desc->irq_data); -} - -static void __init spear_shirq_register(struct spear_shirq *shirq) -{ - int i; - - if (shirq->invalid_irq) - return; - - irq_set_chained_handler(shirq->irq, shirq_handler); - for (i = 0; i < shirq->irq_nr; i++) { - irq_set_chip_and_handler(shirq->irq_base + i, - &shirq_chip, handle_simple_irq); - set_irq_flags(shirq->irq_base + i, IRQF_VALID); - irq_set_chip_data(shirq->irq_base + i, shirq); - } - - irq_set_handler_data(shirq->irq, shirq); -} - -static int __init shirq_init(struct spear_shirq **shirq_blocks, int block_nr, - struct device_node *np) -{ - int i, irq_base, hwirq = 0, irq_nr = 0; - static struct irq_domain *shirq_domain; - void __iomem *base; - - base = of_iomap(np, 0); - if (!base) { - pr_err("%s: failed to map shirq registers\n", __func__); - return -ENXIO; - } - - for (i = 0; i < block_nr; i++) - irq_nr += shirq_blocks[i]->irq_nr; - - irq_base = irq_alloc_descs(-1, 0, irq_nr, 0); - if (IS_ERR_VALUE(irq_base)) { - pr_err("%s: irq desc alloc failed\n", __func__); - goto err_unmap; - } - - shirq_domain = irq_domain_add_legacy(np, irq_nr, irq_base, 0, - &irq_domain_simple_ops, NULL); - if (WARN_ON(!shirq_domain)) { - pr_warn("%s: irq domain init failed\n", __func__); - goto err_free_desc; - } - - for (i = 0; i < block_nr; i++) { - shirq_blocks[i]->base = base; - shirq_blocks[i]->irq_base = irq_find_mapping(shirq_domain, - hwirq); - shirq_blocks[i]->irq = irq_of_parse_and_map(np, i); - - spear_shirq_register(shirq_blocks[i]); - hwirq += shirq_blocks[i]->irq_nr; - } - - return 0; - -err_free_desc: - irq_free_descs(irq_base, irq_nr); -err_unmap: - iounmap(base); - return -ENXIO; -} - -int __init spear300_shirq_of_init(struct device_node *np, - struct device_node *parent) -{ - return shirq_init(spear300_shirq_blocks, - ARRAY_SIZE(spear300_shirq_blocks), np); -} - -int __init spear310_shirq_of_init(struct device_node *np, - struct device_node *parent) -{ - return shirq_init(spear310_shirq_blocks, - ARRAY_SIZE(spear310_shirq_blocks), np); -} - -int __init spear320_shirq_of_init(struct device_node *np, - struct device_node *parent) -{ - return shirq_init(spear320_shirq_blocks, - ARRAY_SIZE(spear320_shirq_blocks), np); -} diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 054321db4350..bf7b43696cdb 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -1 +1,2 @@ -obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o +obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o +obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c new file mode 100644 index 000000000000..80e1d2fd9d4c --- /dev/null +++ b/drivers/irqchip/spear-shirq.c @@ -0,0 +1,316 @@ +/* + * SPEAr platform shared irq layer source file + * + * Copyright (C) 2009-2012 ST Microelectronics + * Viresh Kumar + * + * Copyright (C) 2012 ST Microelectronics + * Shiraz Hashim + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(lock); + +/* spear300 shared irq registers offsets and masks */ +#define SPEAR300_INT_ENB_MASK_REG 0x54 +#define SPEAR300_INT_STS_MASK_REG 0x58 + +static struct spear_shirq spear300_shirq_ras1 = { + .irq_nr = 9, + .irq_bit_off = 0, + .regs = { + .enb_reg = SPEAR300_INT_ENB_MASK_REG, + .status_reg = SPEAR300_INT_STS_MASK_REG, + .clear_reg = -1, + }, +}; + +static struct spear_shirq *spear300_shirq_blocks[] = { + &spear300_shirq_ras1, +}; + +/* spear310 shared irq registers offsets and masks */ +#define SPEAR310_INT_STS_MASK_REG 0x04 + +static struct spear_shirq spear310_shirq_ras1 = { + .irq_nr = 8, + .irq_bit_off = 0, + .regs = { + .enb_reg = -1, + .status_reg = SPEAR310_INT_STS_MASK_REG, + .clear_reg = -1, + }, +}; + +static struct spear_shirq spear310_shirq_ras2 = { + .irq_nr = 5, + .irq_bit_off = 8, + .regs = { + .enb_reg = -1, + .status_reg = SPEAR310_INT_STS_MASK_REG, + .clear_reg = -1, + }, +}; + +static struct spear_shirq spear310_shirq_ras3 = { + .irq_nr = 1, + .irq_bit_off = 13, + .regs = { + .enb_reg = -1, + .status_reg = SPEAR310_INT_STS_MASK_REG, + .clear_reg = -1, + }, +}; + +static struct spear_shirq spear310_shirq_intrcomm_ras = { + .irq_nr = 3, + .irq_bit_off = 14, + .regs = { + .enb_reg = -1, + .status_reg = SPEAR310_INT_STS_MASK_REG, + .clear_reg = -1, + }, +}; + +static struct spear_shirq *spear310_shirq_blocks[] = { + &spear310_shirq_ras1, + &spear310_shirq_ras2, + &spear310_shirq_ras3, + &spear310_shirq_intrcomm_ras, +}; + +/* spear320 shared irq registers offsets and masks */ +#define SPEAR320_INT_STS_MASK_REG 0x04 +#define SPEAR320_INT_CLR_MASK_REG 0x04 +#define SPEAR320_INT_ENB_MASK_REG 0x08 + +static struct spear_shirq spear320_shirq_ras1 = { + .irq_nr = 3, + .irq_bit_off = 7, + .regs = { + .enb_reg = -1, + .status_reg = SPEAR320_INT_STS_MASK_REG, + .clear_reg = SPEAR320_INT_CLR_MASK_REG, + .reset_to_clear = 1, + }, +}; + +static struct spear_shirq spear320_shirq_ras2 = { + .irq_nr = 1, + .irq_bit_off = 10, + .regs = { + .enb_reg = -1, + .status_reg = SPEAR320_INT_STS_MASK_REG, + .clear_reg = SPEAR320_INT_CLR_MASK_REG, + .reset_to_clear = 1, + }, +}; + +static struct spear_shirq spear320_shirq_ras3 = { + .irq_nr = 3, + .irq_bit_off = 0, + .invalid_irq = 1, + .regs = { + .enb_reg = SPEAR320_INT_ENB_MASK_REG, + .reset_to_enb = 1, + .status_reg = SPEAR320_INT_STS_MASK_REG, + .clear_reg = SPEAR320_INT_CLR_MASK_REG, + .reset_to_clear = 1, + }, +}; + +static struct spear_shirq spear320_shirq_intrcomm_ras = { + .irq_nr = 11, + .irq_bit_off = 11, + .regs = { + .enb_reg = -1, + .status_reg = SPEAR320_INT_STS_MASK_REG, + .clear_reg = SPEAR320_INT_CLR_MASK_REG, + .reset_to_clear = 1, + }, +}; + +static struct spear_shirq *spear320_shirq_blocks[] = { + &spear320_shirq_ras3, + &spear320_shirq_ras1, + &spear320_shirq_ras2, + &spear320_shirq_intrcomm_ras, +}; + +static void shirq_irq_mask_unmask(struct irq_data *d, bool mask) +{ + struct spear_shirq *shirq = irq_data_get_irq_chip_data(d); + u32 val, offset = d->irq - shirq->irq_base; + unsigned long flags; + + if (shirq->regs.enb_reg == -1) + return; + + spin_lock_irqsave(&lock, flags); + val = readl(shirq->base + shirq->regs.enb_reg); + + if (mask ^ shirq->regs.reset_to_enb) + val &= ~(0x1 << shirq->irq_bit_off << offset); + else + val |= 0x1 << shirq->irq_bit_off << offset; + + writel(val, shirq->base + shirq->regs.enb_reg); + spin_unlock_irqrestore(&lock, flags); + +} + +static void shirq_irq_mask(struct irq_data *d) +{ + shirq_irq_mask_unmask(d, 1); +} + +static void shirq_irq_unmask(struct irq_data *d) +{ + shirq_irq_mask_unmask(d, 0); +} + +static struct irq_chip shirq_chip = { + .name = "spear-shirq", + .irq_ack = shirq_irq_mask, + .irq_mask = shirq_irq_mask, + .irq_unmask = shirq_irq_unmask, +}; + +static void shirq_handler(unsigned irq, struct irq_desc *desc) +{ + u32 i, j, val, mask, tmp; + struct irq_chip *chip; + struct spear_shirq *shirq = irq_get_handler_data(irq); + + chip = irq_get_chip(irq); + chip->irq_ack(&desc->irq_data); + + mask = ((0x1 << shirq->irq_nr) - 1) << shirq->irq_bit_off; + while ((val = readl(shirq->base + shirq->regs.status_reg) & + mask)) { + + val >>= shirq->irq_bit_off; + for (i = 0, j = 1; i < shirq->irq_nr; i++, j <<= 1) { + + if (!(j & val)) + continue; + + generic_handle_irq(shirq->irq_base + i); + + /* clear interrupt */ + if (shirq->regs.clear_reg == -1) + continue; + + tmp = readl(shirq->base + shirq->regs.clear_reg); + if (shirq->regs.reset_to_clear) + tmp &= ~(j << shirq->irq_bit_off); + else + tmp |= (j << shirq->irq_bit_off); + writel(tmp, shirq->base + shirq->regs.clear_reg); + } + } + chip->irq_unmask(&desc->irq_data); +} + +static void __init spear_shirq_register(struct spear_shirq *shirq) +{ + int i; + + if (shirq->invalid_irq) + return; + + irq_set_chained_handler(shirq->irq, shirq_handler); + for (i = 0; i < shirq->irq_nr; i++) { + irq_set_chip_and_handler(shirq->irq_base + i, + &shirq_chip, handle_simple_irq); + set_irq_flags(shirq->irq_base + i, IRQF_VALID); + irq_set_chip_data(shirq->irq_base + i, shirq); + } + + irq_set_handler_data(shirq->irq, shirq); +} + +static int __init shirq_init(struct spear_shirq **shirq_blocks, int block_nr, + struct device_node *np) +{ + int i, irq_base, hwirq = 0, irq_nr = 0; + static struct irq_domain *shirq_domain; + void __iomem *base; + + base = of_iomap(np, 0); + if (!base) { + pr_err("%s: failed to map shirq registers\n", __func__); + return -ENXIO; + } + + for (i = 0; i < block_nr; i++) + irq_nr += shirq_blocks[i]->irq_nr; + + irq_base = irq_alloc_descs(-1, 0, irq_nr, 0); + if (IS_ERR_VALUE(irq_base)) { + pr_err("%s: irq desc alloc failed\n", __func__); + goto err_unmap; + } + + shirq_domain = irq_domain_add_legacy(np, irq_nr, irq_base, 0, + &irq_domain_simple_ops, NULL); + if (WARN_ON(!shirq_domain)) { + pr_warn("%s: irq domain init failed\n", __func__); + goto err_free_desc; + } + + for (i = 0; i < block_nr; i++) { + shirq_blocks[i]->base = base; + shirq_blocks[i]->irq_base = irq_find_mapping(shirq_domain, + hwirq); + shirq_blocks[i]->irq = irq_of_parse_and_map(np, i); + + spear_shirq_register(shirq_blocks[i]); + hwirq += shirq_blocks[i]->irq_nr; + } + + return 0; + +err_free_desc: + irq_free_descs(irq_base, irq_nr); +err_unmap: + iounmap(base); + return -ENXIO; +} + +int __init spear300_shirq_of_init(struct device_node *np, + struct device_node *parent) +{ + return shirq_init(spear300_shirq_blocks, + ARRAY_SIZE(spear300_shirq_blocks), np); +} + +int __init spear310_shirq_of_init(struct device_node *np, + struct device_node *parent) +{ + return shirq_init(spear310_shirq_blocks, + ARRAY_SIZE(spear310_shirq_blocks), np); +} + +int __init spear320_shirq_of_init(struct device_node *np, + struct device_node *parent) +{ + return shirq_init(spear320_shirq_blocks, + ARRAY_SIZE(spear320_shirq_blocks), np); +} diff --git a/include/linux/irqchip/spear-shirq.h b/include/linux/irqchip/spear-shirq.h new file mode 100644 index 000000000000..c8be16d213a3 --- /dev/null +++ b/include/linux/irqchip/spear-shirq.h @@ -0,0 +1,64 @@ +/* + * SPEAr platform shared irq layer header file + * + * Copyright (C) 2009-2012 ST Microelectronics + * Viresh Kumar + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __SPEAR_SHIRQ_H +#define __SPEAR_SHIRQ_H + +#include +#include + +/* + * struct shirq_regs: shared irq register configuration + * + * enb_reg: enable register offset + * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt + * status_reg: status register offset + * status_reg_mask: status register valid mask + * clear_reg: clear register offset + * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt + */ +struct shirq_regs { + u32 enb_reg; + u32 reset_to_enb; + u32 status_reg; + u32 clear_reg; + u32 reset_to_clear; +}; + +/* + * struct spear_shirq: shared irq structure + * + * irq: hardware irq number + * irq_base: base irq in linux domain + * irq_nr: no. of shared interrupts in a particular block + * irq_bit_off: starting bit offset in the status register + * invalid_irq: irq group is currently disabled + * base: base address of shared irq register + * regs: register configuration for shared irq block + */ +struct spear_shirq { + u32 irq; + u32 irq_base; + u32 irq_nr; + u32 irq_bit_off; + int invalid_irq; + void __iomem *base; + struct shirq_regs regs; +}; + +int __init spear300_shirq_of_init(struct device_node *np, + struct device_node *parent); +int __init spear310_shirq_of_init(struct device_node *np, + struct device_node *parent); +int __init spear320_shirq_of_init(struct device_node *np, + struct device_node *parent); + +#endif /* __SPEAR_SHIRQ_H */ -- cgit v1.2.3 From 08ff32352d6ff7083533dc1c25618d42f92ec28e Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 21 Oct 2012 14:59:24 +0000 Subject: mlx4: 64-byte CQE/EQE support ConnectX-3 devices can use either 64- or 32-byte completion queue entries (CQEs) and event queue entries (EQEs). Using 64-byte EQEs/CQEs performs better because each entry is aligned to a complete cacheline. This patch queries the HCA's capabilities, and if it supports 64-byte CQEs and EQES the driver will configure the HW to work in 64-byte mode. The 32-byte vs 64-byte mode is global per HCA and not per CQ or EQ. Since this mode is global, userspace (libmlx4) must be updated to work with the configured CQE size, and guests using SR-IOV virtual functions need to know both EQE and CQE size. In case one of the 64-byte CQE/EQE capabilities is activated, the patch makes sure that older guest drivers that use the QUERY_DEV_FUNC command (e.g as done in mlx4_core of Linux 3.3..3.6) will notice that they need an update to be able to work with the PPF. This is done by changing the returned pf_context_behaviour not to be zero any more. In case none of these capabilities is activated that value remains zero and older guest drivers can run OK. The SRIOV related flow is as follows 1. the PPF does the detection of the new capabilities using QUERY_DEV_CAP command. 2. the PPF activates the new capabilities using INIT_HCA. 3. the VF detects if the PPF activated the capabilities using QUERY_HCA, and if this is the case activates them for itself too. Note that the VF detects that it must be aware to the new PF behaviour using QUERY_FUNC_CAP. Steps 1 and 2 apply also for native mode. User space notification is done through a new field introduced in struct mlx4_ib_ucontext which holds device capabilities for which user space must take action. This changes the binary interface so the ABI towards libmlx4 exposed through uverbs is bumped from 3 to 4 but only when **needed** i.e. only when the driver does use 64-byte CQEs or future device capabilities which must be in sync by user space. This practice allows to work with unmodified libmlx4 on older devices (e.g A0, B0) which don't support 64-byte CQEs. In order to keep existing systems functional when they update to a newer kernel that contains these changes in VF and userspace ABI, a module parameter enable_64b_cqe_eqe must be set to enable 64-byte mode; the default is currently false. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 34 +++++++++++++++++------ drivers/infiniband/hw/mlx4/main.c | 27 ++++++++++++++---- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/infiniband/hw/mlx4/user.h | 12 +++++++- drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 ++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 5 ++-- drivers/net/ethernet/mellanox/mlx4/eq.c | 26 ++++++++++++------ drivers/net/ethernet/mellanox/mlx4/fw.c | 30 +++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 38 +++++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 21 ++++++++++++++ 15 files changed, 175 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index c9eb6a6815ce..ae67df35dd4d 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) { - return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); + return mlx4_buf_offset(&buf->buf, n * buf->entry_size); } static void *get_cqe(struct mlx4_ib_cq *cq, int n) @@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n) static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) { struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); + struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe); - return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; } @@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * { int err; - err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), + err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, PAGE_SIZE * 2, &buf->buf); if (err) goto out; + buf->entry_size = dev->dev->caps.cqe_size; err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, &buf->mtt); if (err) @@ -120,8 +122,7 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &buf->mtt); err_buf: - mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), - &buf->buf); + mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf); out: return err; @@ -129,7 +130,7 @@ out: static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) { - mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); + mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); } static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, @@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont u64 buf_addr, int cqe) { int err; + int cqe_size = dev->dev->caps.cqe_size; - *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), + *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) { struct mlx4_cqe *cqe, *new_cqe; int i; + int cqe_size = cq->buf.entry_size; + int cqe_inc = cqe_size == 64 ? 1 : 0; i = cq->mcq.cons_index; cqe = get_cqe(cq, i & cq->ibcq.cqe); + cqe += cqe_inc; + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, (i + 1) & cq->resize_buf->cqe); - memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size); + new_cqe += cqe_inc; + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + cqe += cqe_inc; } ++cq->mcq.cons_index; } @@ -438,6 +447,7 @@ err_buf: out: mutex_unlock(&cq->resize_mutex); + return err; } @@ -586,6 +596,9 @@ repoll: if (!cqe) return -EAGAIN; + if (cq->buf.entry_size == 64) + cqe++; + ++cq->mcq.cons_index; /* @@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) int nfreed = 0; struct mlx4_cqe *cqe, *dest; u8 owner_bit; + int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0; /* * First we need to find the current producer index, so we @@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + cqe += cqe_inc; + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + dest += cqe_inc; + owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; memcpy(dest, cqe, sizeof *cqe); dest->owner_sr_opcode = owner_bit | diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 718ec6b2bad2..e7d81c0d1ac5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_ucontext *context; + struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) return ERR_PTR(-EAGAIN); - resp.qp_tab_size = dev->dev->caps.num_qps; - resp.bf_reg_size = dev->dev->caps.bf_reg_size; - resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { + resp_v3.qp_tab_size = dev->dev->caps.num_qps; + resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; + resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + } else { + resp.dev_caps = dev->dev->caps.userspace_caps; + resp.qp_tab_size = dev->dev->caps.num_qps; + resp.bf_reg_size = dev->dev->caps.bf_reg_size; + resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + resp.cqe_size = dev->dev->caps.cqe_size; + } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) @@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - err = ib_copy_to_udata(udata, &resp, sizeof resp); + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) + err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); + else + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); kfree(context); @@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; - ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + if (dev->caps.userspace_caps) + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + else + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; + ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e04cbc9a54a5..dcd845bc30f0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -90,6 +90,7 @@ struct mlx4_ib_xrcd { struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; + int entry_size; }; struct mlx4_ib_cq_resize { diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index 13beedeeef9f..07e6769ef43b 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -40,7 +40,9 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MLX4_IB_UVERBS_ABI_VERSION 3 + +#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3 +#define MLX4_IB_UVERBS_ABI_VERSION 4 /* * Make sure that all structs defined in this file remain laid out so @@ -50,10 +52,18 @@ * instead. */ +struct mlx4_ib_alloc_ucontext_resp_v3 { + __u32 qp_tab_size; + __u16 bf_reg_size; + __u16 bf_regs_per_page; +}; + struct mlx4_ib_alloc_ucontext_resp { + __u32 dev_caps; __u32 qp_tab_size; __u16 bf_reg_size; __u16 bf_regs_per_page; + __u32 cqe_size; }; struct mlx4_ib_alloc_pd_resp { diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3d1899ff1076..e791e705f7b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) spin_lock_init(&s_state->lock); } - memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); + memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index aa9c2f6cf3c0..b8d0854a7ad1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, int err; cq->size = entries; - cq->buf_size = cq->size * sizeof(struct mlx4_cqe); + cq->buf_size = cq->size * mdev->dev->caps.cqe_size; cq->ring = ring; cq->is_tx = mode; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index edd9cb8d3e1d..93a325669582 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, goto out; } priv->rx_ring_num = prof->rx_ring_num; + priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->msg_enable = MLX4_EN_MSG_LEVEL; spin_lock_init(&priv->stats_lock); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5aba5ecdf1e2..6fa106f6c0ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct ethhdr *ethh; dma_addr_t dma; u64 s_mac; + int factor = priv->cqe_factor; if (!priv->port_up) return 0; @@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; - cqe = &cq->buf[index]; + cqe = &cq->buf[(index << factor) + factor]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, @@ -709,7 +710,7 @@ next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; - cqe = &cq->buf[index]; + cqe = &cq->buf[(index << factor) + factor]; if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index b35094c590ba..25c157abdd92 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) struct mlx4_cqe *buf = cq->buf; u32 packets = 0; u32 bytes = 0; + int factor = priv->cqe_factor; if (!priv->port_up) return; index = cons_index & size_mask; - cqe = &buf[index]; + cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; /* Process all completed CQEs */ @@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) ++cons_index; index = cons_index & size_mask; - cqe = &buf[index]; + cqe = &buf[(index << factor) + factor]; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index b84a88bc44dc..c509a86db610 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not) mb(); } -static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry) +static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor) { - unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE; - return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; + /* (entry & (eq->nent - 1)) gives us a cyclic array */ + unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor); + /* CX3 is capable of extending the EQE from 32 to 64 bytes. + * When this feature is enabled, the first (in the lower addresses) + * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes + * contain the legacy EQE information. + */ + return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE; } -static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq) +static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor) { - struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index); + struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor); return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; } @@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) return; } - memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); + memcpy(s_eqe, eqe, dev->caps.eqe_size - 1); s_eqe->slave_id = slave; /* ensure all information is written before setting the ownersip bit */ wmb(); @@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int i; enum slave_port_gen_event gen_event; - while ((eqe = next_eqe_sw(eq))) { + while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); - npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ + npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); @@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev, struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int err; - int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE; int i; + /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */ + int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 4f30b99324cf..9a9de51ecc91 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [42] = "Multicast VEP steering support", [48] = "Counters support", [59] = "Port management change event support", + [61] = "64 byte EQE support", + [62] = "64 byte CQE support", }; int i; @@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, field = dev->caps.num_ports; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - size = 0; /* no PF behaviour is set for now */ + size = dev->caps.function_caps; /* set PF behaviours */ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); field = 0; /* protected FMR support not available as yet */ @@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { + *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + } else { + dev->caps.cqe_size = 32; + } + /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); @@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; int err; + u8 byte_field; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 @@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); } + /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ + MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS); + if (byte_field & 0x20) /* 64-bytes eqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; + if (byte_field & 0x40) /* 64-bytes cqe enabled */ + param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; + /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 85abe9c11a22..2c2e7ade2a34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -172,6 +172,7 @@ struct mlx4_init_hca_param { u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; + u64 dev_cap_enabled; }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2aa80afd98d2..4337f685175d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " Not in use with device managed" " flow steering"); +static bool enable_64b_cqe_eqe; +module_param(enable_64b_cqe_eqe, bool, 0444); +MODULE_PARM_DESC(enable_64b_cqe_eqe, + "Enable 64 byte CQEs/EQEs when the the FW supports this"); + #define HCA_GLOBAL_CAP_MASK 0 -#define PF_CONTEXT_BEHAVIOUR_MASK 0 + +#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX core driver v" @@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; + + if (!enable_64b_cqe_eqe) { + if (dev_cap->flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { + mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; + } + } + + if ((dev_cap->flags & + (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && + mlx4_is_master(dev)) + dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; + return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) goto err_mem; } + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { + dev->caps.eqe_size = 64; + dev->caps.eqe_factor = 1; + } else { + dev->caps.eqe_size = 32; + dev->caps.eqe_factor = 0; + } + + if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { + dev->caps.cqe_size = 64; + dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; + } else { + dev->caps.cqe_size = 32; + } + return 0; err_mem: diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9d27e42264e2..73b5c2ac5bd5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -487,6 +487,7 @@ struct mlx4_en_priv { int mac_index; unsigned max_mtu; int base_qpn; + int cqe_factor; struct mlx4_en_rss_map rss_map; __be32 ctrl_flags; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6d1acb04cd17..21821da2abfd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -142,6 +142,8 @@ enum { MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, + MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61, + MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62 }; enum { @@ -151,6 +153,20 @@ enum { MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 }; +enum { + MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0, + MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1 +}; + +enum { + MLX4_USER_DEV_CAP_64B_CQE = 1L << 0 +}; + +enum { + MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0 +}; + + #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { @@ -419,6 +435,11 @@ struct mlx4_caps { u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; u16 sqp_demux; + u32 eqe_size; + u32 cqe_size; + u8 eqe_factor; + u32 userspace_caps; /* userspace must be aware of these */ + u32 function_caps; /* VFs must be aware of these */ }; struct mlx4_buf_list { -- cgit v1.2.3 From e95c17e9caff4b106cc95b761f3e07964a5a0d9f Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 16 Oct 2012 17:33:44 -0500 Subject: drivers/video: fsl-diu-fb: add support for set_gamma ioctls The MPC5121 BSP comes with a gamma_set utility that initializes the gamma table via an ioctl. Unfortunately, the ioctl number that utility uses is defined improperly, but we can still support it. Signed-off-by: Timur Tabi --- drivers/video/fsl-diu-fb.c | 17 +++++++++++++++++ include/linux/fsl-diu-fb.h | 9 +++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c index 5b12e1783fac..9c4054760627 100644 --- a/drivers/video/fsl-diu-fb.c +++ b/drivers/video/fsl-diu-fb.c @@ -1181,6 +1181,23 @@ static int fsl_diu_ioctl(struct fb_info *info, unsigned int cmd, ad->ckmin_b = ck.blue_min; } break; +#ifdef CONFIG_PPC_MPC512x + case MFB_SET_GAMMA: { + struct fsl_diu_data *data = mfbi->parent; + + if (copy_from_user(data->gamma, buf, sizeof(data->gamma))) + return -EFAULT; + setbits32(&data->diu_reg->gamma, 0); /* Force table reload */ + break; + } + case MFB_GET_GAMMA: { + struct fsl_diu_data *data = mfbi->parent; + + if (copy_to_user(buf, data->gamma, sizeof(data->gamma))) + return -EFAULT; + break; + } +#endif default: dev_err(info->dev, "unknown ioctl command (0x%08X)\n", cmd); return -ENOIOCTLCMD; diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h index 11c16a1fb9e3..a1e8277120c7 100644 --- a/include/linux/fsl-diu-fb.h +++ b/include/linux/fsl-diu-fb.h @@ -46,6 +46,15 @@ struct aoi_display_offset { #define MFB_SET_PIXFMT _IOW('M', 8, __u32) #define MFB_GET_PIXFMT _IOR('M', 8, __u32) +/* + * The MPC5121 BSP comes with a gamma_set utility that initializes the + * gamma table. Unfortunately, it uses bad values for the IOCTL commands, + * but there's nothing we can do about it now. These ioctls are only + * supported on the MPC5121. + */ +#define MFB_SET_GAMMA _IOW('M', 1, __u8) +#define MFB_GET_GAMMA _IOR('M', 1, __u8) + /* * The original definitions of MFB_SET_PIXFMT and MFB_GET_PIXFMT used the * wrong value for 'size' field of the ioctl. The current macros above use the -- cgit v1.2.3 From a8df7b1ab70bfd6f261fa5e96985fca638299acc Mon Sep 17 00:00:00 2001 From: Fabio Baltieri Date: Sun, 4 Nov 2012 01:54:34 -0800 Subject: leds: add led_trigger_rename function Implements a "led_trigger_rename" function to rename a trigger with proper locking. This assumes that led name was originally allocated in non-constant storage. Signed-off-by: Fabio Baltieri Cc: Kurt Van Dijck Signed-off-by: Bryan Wu --- drivers/leds/led-triggers.c | 13 +++++++++++++ include/linux/leds.h | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index 262eb4193710..a4fa4bf02d53 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -166,6 +166,19 @@ void led_trigger_set_default(struct led_classdev *led_cdev) } EXPORT_SYMBOL_GPL(led_trigger_set_default); +void led_trigger_rename_static(const char *name, struct led_trigger *trig) +{ + /* new name must be on a temporary string to prevent races */ + BUG_ON(name == trig->name); + + down_write(&triggers_list_lock); + /* this assumes that trig->name was originaly allocated to + * non constant storage */ + strcpy((char *)trig->name, name); + up_write(&triggers_list_lock); +} +EXPORT_SYMBOL_GPL(led_trigger_rename_static); + /* LED Trigger Interface */ int led_trigger_register(struct led_trigger *trig) diff --git a/include/linux/leds.h b/include/linux/leds.h index 6e53bb31c220..8107592cfc41 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -139,6 +139,24 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, extern void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness); +/** + * led_trigger_rename_static - rename a trigger + * @name: the new trigger name + * @trig: the LED trigger to rename + * + * Change a LED trigger name by copying the string passed in + * name into current trigger name, which MUST be large + * enough for the new string. + * + * Note that name must NOT point to the same string used + * during LED registration, as that could lead to races. + * + * This is meant to be used on triggers with statically + * allocated name. + */ +extern void led_trigger_rename_static(const char *name, + struct led_trigger *trig); + /* * LED Triggers */ -- cgit v1.2.3 From 057407c732b7761cf417a1e6633a02d9d473340d Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Sun, 18 Nov 2012 21:35:55 -0800 Subject: led: Add dependency on CONFIG_LEDS_TRIGGERS to led_trigger_rename_static() This patch fixes build warnings when CONFIG_LEDS_TRIGGERS is disabled as below: include/linux/leds.h:158:18: warning: 'struct led_trigger' declared inside parameter list [enabled by default] include/linux/leds.h:158:18: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] Signed-off-by: Jingoo Han Acked-by: Fabio Baltieri Signed-off-by: Bryan Wu --- include/linux/leds.h | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 8107592cfc41..0d9b5eed714e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -139,24 +139,6 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, extern void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness); -/** - * led_trigger_rename_static - rename a trigger - * @name: the new trigger name - * @trig: the LED trigger to rename - * - * Change a LED trigger name by copying the string passed in - * name into current trigger name, which MUST be large - * enough for the new string. - * - * Note that name must NOT point to the same string used - * during LED registration, as that could lead to races. - * - * This is meant to be used on triggers with statically - * allocated name. - */ -extern void led_trigger_rename_static(const char *name, - struct led_trigger *trig); - /* * LED Triggers */ @@ -197,6 +179,23 @@ extern void led_trigger_blink_oneshot(struct led_trigger *trigger, unsigned long *delay_on, unsigned long *delay_off, int invert); +/** + * led_trigger_rename_static - rename a trigger + * @name: the new trigger name + * @trig: the LED trigger to rename + * + * Change a LED trigger name by copying the string passed in + * name into current trigger name, which MUST be large + * enough for the new string. + * + * Note that name must NOT point to the same string used + * during LED registration, as that could lead to races. + * + * This is meant to be used on triggers with statically + * allocated name. + */ +extern void led_trigger_rename_static(const char *name, + struct led_trigger *trig); #else -- cgit v1.2.3 From e3725ec015dfbbeb896295cf2b3a995f28b0630e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:25:01 -0500 Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving the session pointer Move the session pointer into the slot table, then have struct nfs4_slot point to that slot table. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 ++- fs/nfs/nfs4proc.c | 33 +++++++++++++++++++++++---------- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 8 +++++--- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 2 +- 6 files changed, 33 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 36880b9aa91e..42c58691fb41 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -258,7 +258,8 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp, extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); -extern struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags); +extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, + u32 max_slots, gfp_t gfp_flags); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 14b39742b6e4..5b61c4a83191 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -467,25 +467,28 @@ void nfs4_check_drain_bc_complete(struct nfs4_session *ses) static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) { + struct nfs4_session *session; struct nfs4_slot_table *tbl; - tbl = &res->sr_session->fc_slot_table; if (!res->sr_slot) { /* just wake up the next guy waiting since * we may have not consumed a slot after all */ dprintk("%s: No slot\n", __func__); return; } + tbl = res->sr_slot->table; + session = tbl->session; spin_lock(&tbl->slot_tbl_lock); nfs4_free_slot(tbl, res->sr_slot - tbl->slots); - nfs4_check_drain_fc_complete(res->sr_session); + nfs4_check_drain_fc_complete(session); spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; } static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { + struct nfs4_session *session; struct nfs4_slot *slot; unsigned long timestamp; struct nfs_client *clp; @@ -504,6 +507,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * goto out; slot = res->sr_slot; + session = slot->table->session; /* Check the SEQUENCE operation status */ switch (res->sr_status) { @@ -511,7 +515,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * /* Update the slot's sequence and clientid lease timer */ ++slot->seq_nr; timestamp = slot->renewal_time; - clp = res->sr_session->clp; + clp = session->clp; do_renew_lease(clp, timestamp); /* Check sequence flags */ if (res->sr_status_flags != 0) @@ -524,7 +528,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * */ dprintk("%s: slot=%td seq=%d: Operation in progress\n", __func__, - slot - res->sr_session->fc_slot_table.slots, + slot - session->fc_slot_table.slots, slot->seq_nr); goto out_retry; default: @@ -546,7 +550,7 @@ out_retry: static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { - if (res->sr_session == NULL) + if (res->sr_slot == NULL) return 1; return nfs41_sequence_done(task, res); } @@ -591,7 +595,6 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args, args->sa_cache_this = 0; if (cache_reply) args->sa_cache_this = 1; - res->sr_session = NULL; res->sr_slot = NULL; } @@ -646,7 +649,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); - res->sr_session = session; res->sr_slot = slot; res->sr_status_flags = 0; /* @@ -5659,9 +5661,18 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } -struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) +struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, + u32 max_slots, gfp_t gfp_flags) { - return kmalloc_array(max_slots, sizeof(struct nfs4_slot), gfp_flags); + struct nfs4_slot *tbl; + u32 i; + + tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags); + if (tbl != NULL) { + for (i = 0; i < max_slots; i++) + tbl[i].table = table; + } + return tbl; } static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, @@ -5699,7 +5710,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, /* Does the newly negotiated max_reqs match the existing slot table? */ if (max_reqs != tbl->max_slots) { - new = nfs4_alloc_slots(max_reqs, GFP_NOFS); + new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS); if (!new) goto out; } @@ -5738,11 +5749,13 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) dprintk("--> %s\n", __func__); /* Fore channel */ tbl = &ses->fc_slot_table; + tbl->session = ses; status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); if (status) /* -ENOMEM */ return status; /* Back channel */ tbl = &ses->bc_slot_table; + tbl->session = ses; status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); if (status && tbl->slots == NULL) /* Fore and back channel share a connection so get diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 96fcbb97fd6a..9495789c425b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2033,7 +2033,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) return 0; nfs4_begin_drain_session(clp); fc_tbl = &clp->cl_session->fc_slot_table; - new = nfs4_alloc_slots(fc_tbl->target_max_slots, GFP_NOFS); + new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS); if (!new) return -ENOMEM; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 672d9b0ef2c5..4126f054610a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5507,12 +5507,13 @@ static int decode_sequence(struct xdr_stream *xdr, struct rpc_rqst *rqstp) { #if defined(CONFIG_NFS_V4_1) + struct nfs4_session *session; struct nfs4_sessionid id; u32 dummy; int status; __be32 *p; - if (!res->sr_session) + if (res->sr_slot == NULL) return 0; status = decode_op_hdr(xdr, OP_SEQUENCE); @@ -5526,8 +5527,9 @@ static int decode_sequence(struct xdr_stream *xdr, * sequence number, the server is looney tunes. */ status = -EREMOTEIO; + session = res->sr_slot->table->session; - if (memcmp(id.data, res->sr_session->sess_id.data, + if (memcmp(id.data, session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("%s Invalid session id\n", __func__); goto out_err; @@ -5545,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr, } /* slot id */ dummy = be32_to_cpup(p++); - if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) { + if (dummy != res->sr_slot - session->fc_slot_table.slots) { dprintk("%s Invalid slot id\n", __func__); goto out_err; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 97c8f9191880..b0412873d29c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -209,6 +209,7 @@ struct nfs_server { /* Sessions */ #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) struct nfs4_slot_table { + struct nfs4_session *session; /* Parent session */ struct nfs4_slot *slots; /* seqid per slot */ unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ spinlock_t slot_tbl_lock; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9cb1c63a70c2..0fd88ab0e814 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -187,6 +187,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { + struct nfs4_slot_table *table; unsigned long renewal_time; u32 seq_nr; }; @@ -198,7 +199,6 @@ struct nfs4_sequence_args { }; struct nfs4_sequence_res { - struct nfs4_session *sr_session; struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ u32 sr_status_flags; -- cgit v1.2.3 From df2fabffbace8988f3265585ec793ff9deccdea7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:45:06 -0500 Subject: NFSv4.1: Label each entry in the session slot tables with its slot number Instead of doing slot table pointer gymnastics every time we want to know which slot we're using. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++----- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5b61c4a83191..4311dba49c58 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -526,9 +526,9 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * returned NFS4ERR_DELAY as per Section 2.10.6.2 * of RFC5661. */ - dprintk("%s: slot=%td seq=%d: Operation in progress\n", + dprintk("%s: slot=%u seq=%u: Operation in progress\n", __func__, - slot - session->fc_slot_table.slots, + slot->slot_nr, slot->seq_nr); goto out_retry; default: @@ -671,9 +671,9 @@ int nfs4_setup_sequence(const struct nfs_server *server, if (session == NULL) goto out; - dprintk("--> %s clp %p session %p sr_slot %td\n", + dprintk("--> %s clp %p session %p sr_slot %d\n", __func__, session->clp, session, res->sr_slot ? - res->sr_slot - session->fc_slot_table.slots : -1); + res->sr_slot->slot_nr : -1); ret = nfs41_setup_sequence(session, args, res, task); out: @@ -5669,8 +5669,10 @@ struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags); if (tbl != NULL) { - for (i = 0; i < max_slots; i++) + for (i = 0; i < max_slots; i++) { tbl[i].table = table; + tbl[i].slot_nr = i; + } } return tbl; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4126f054610a..50bac7066160 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5547,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr, } /* slot id */ dummy = be32_to_cpup(p++); - if (dummy != res->sr_slot - session->fc_slot_table.slots) { + if (dummy != res->sr_slot->slot_nr) { dprintk("%s Invalid slot id\n", __func__); goto out_err; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0fd88ab0e814..9c9b76c94b46 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -189,6 +189,7 @@ struct nfs4_channel_attrs { struct nfs4_slot { struct nfs4_slot_table *table; unsigned long renewal_time; + u32 slot_nr; u32 seq_nr; }; -- cgit v1.2.3 From 2b2fa71723f955d5b4a0f4edd99cf3cd69ceafd1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:58:36 -0500 Subject: NFSv4.1: Simplify struct nfs4_sequence_args too Replace the session pointer + slotid with a pointer to the allocated slot. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4xdr.c | 21 ++++++++++----------- include/linux/nfs_xdr.h | 3 +-- 3 files changed, 14 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4311dba49c58..6c41a34e34b4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -591,7 +591,7 @@ out: static void nfs41_init_sequence(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply) { - args->sa_session = NULL; + args->sa_slot = NULL; args->sa_cache_this = 0; if (cache_reply) args->sa_cache_this = 1; @@ -644,8 +644,8 @@ int nfs41_setup_sequence(struct nfs4_session *session, rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); slot = tbl->slots + slotid; slot->renewal_time = jiffies; - args->sa_session = session; - args->sa_slotid = slotid; + + args->sa_slot = slot; dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 50bac7066160..27b0fec1a6b0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1833,18 +1833,16 @@ static void encode_sequence(struct xdr_stream *xdr, struct compound_hdr *hdr) { #if defined(CONFIG_NFS_V4_1) - struct nfs4_session *session = args->sa_session; + struct nfs4_session *session; struct nfs4_slot_table *tp; - struct nfs4_slot *slot; + struct nfs4_slot *slot = args->sa_slot; __be32 *p; - if (!session) + if (slot == NULL) return; - tp = &session->fc_slot_table; - - WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); - slot = tp->slots + args->sa_slotid; + tp = slot->table; + session = tp->session; encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); @@ -1858,12 +1856,12 @@ static void encode_sequence(struct xdr_stream *xdr, ((u32 *)session->sess_id.data)[1], ((u32 *)session->sess_id.data)[2], ((u32 *)session->sess_id.data)[3], - slot->seq_nr, args->sa_slotid, + slot->seq_nr, slot->slot_nr, tp->highest_used_slotid, args->sa_cache_this); p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16); p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(slot->seq_nr); - *p++ = cpu_to_be32(args->sa_slotid); + *p++ = cpu_to_be32(slot->slot_nr); *p++ = cpu_to_be32(tp->highest_used_slotid); *p = cpu_to_be32(args->sa_cache_this); #endif /* CONFIG_NFS_V4_1 */ @@ -2025,8 +2023,9 @@ static void encode_free_stateid(struct xdr_stream *xdr, static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) { #if defined(CONFIG_NFS_V4_1) - if (args->sa_session) - return args->sa_session->clp->cl_mvops->minor_version; + + if (args->sa_slot) + return args->sa_slot->table->session->clp->cl_mvops->minor_version; #endif /* CONFIG_NFS_V4_1 */ return 0; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9c9b76c94b46..deb31bbbb857 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -194,8 +194,7 @@ struct nfs4_slot { }; struct nfs4_sequence_args { - struct nfs4_session *sa_session; - u32 sa_slotid; + struct nfs4_slot *sa_slot; u8 sa_cache_this; }; -- cgit v1.2.3 From 31fbcda71489d8cbe2b82819eaab4818524e3a49 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 28 Sep 2012 10:29:07 +0100 Subject: Input: bu21013_ts - move GPIO init and exit functions into the driver These GPIO init and exit functions have no place in platform data, they should be part of the driver instead, Acked-by: Arnd Bergmann Acked-by: Linus Walleij Signed-off-by: Lee Jones Signed-off-by: Dmitry Torokhov --- arch/arm/mach-ux500/board-mop500-stuib.c | 71 +------------------------------- drivers/input/touchscreen/bu21013_ts.c | 69 ++++++++++++++++++++++--------- include/linux/input/bu21013.h | 10 +---- 3 files changed, 53 insertions(+), 97 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-ux500/board-mop500-stuib.c b/arch/arm/mach-ux500/board-mop500-stuib.c index 564f57d5d8a7..7e1f294f0434 100644 --- a/arch/arm/mach-ux500/board-mop500-stuib.c +++ b/arch/arm/mach-ux500/board-mop500-stuib.c @@ -77,9 +77,6 @@ static struct i2c_board_info __initdata mop500_i2c0_devices_stuib[] = { * BU21013 ROHM touchscreen interface on the STUIBs */ -/* tracks number of bu21013 devices being enabled */ -static int bu21013_devices; - #define TOUCH_GPIO_PIN 84 #define TOUCH_XMAX 384 @@ -88,73 +85,8 @@ static int bu21013_devices; #define PRCMU_CLOCK_OCR 0x1CC #define TSC_EXT_CLOCK_9_6MHZ 0x840000 -/** - * bu21013_gpio_board_init : configures the touch panel. - * @reset_pin: reset pin number - * This function can be used to configures - * the voltage and reset the touch panel controller. - */ -static int bu21013_gpio_board_init(int reset_pin) -{ - int retval = 0; - - bu21013_devices++; - if (bu21013_devices == 1) { - retval = gpio_request(reset_pin, "touchp_reset"); - if (retval) { - printk(KERN_ERR "Unable to request gpio reset_pin"); - return retval; - } - retval = gpio_direction_output(reset_pin, 1); - if (retval < 0) { - printk(KERN_ERR "%s: gpio direction failed\n", - __func__); - return retval; - } - } - - return retval; -} - -/** - * bu21013_gpio_board_exit : deconfigures the touch panel controller - * @reset_pin: reset pin number - * This function can be used to deconfigures the chip selection - * for touch panel controller. - */ -static int bu21013_gpio_board_exit(int reset_pin) -{ - int retval = 0; - - if (bu21013_devices == 1) { - retval = gpio_direction_output(reset_pin, 0); - if (retval < 0) { - printk(KERN_ERR "%s: gpio direction failed\n", - __func__); - return retval; - } - gpio_set_value(reset_pin, 0); - } - bu21013_devices--; - - return retval; -} - -/** - * bu21013_read_pin_val : get the interrupt pin value - * This function can be used to get the interrupt pin value for touch panel - * controller. - */ -static int bu21013_read_pin_val(void) -{ - return gpio_get_value(TOUCH_GPIO_PIN); -} - static struct bu21013_platform_device tsc_plat_device = { - .cs_en = bu21013_gpio_board_init, - .cs_dis = bu21013_gpio_board_exit, - .irq_read_val = bu21013_read_pin_val, - .irq = NOMADIK_GPIO_TO_IRQ(TOUCH_GPIO_PIN), + .touch_pin = TOUCH_GPIO_PIN, .touch_x_max = TOUCH_XMAX, .touch_y_max = TOUCH_YMAX, .ext_clk = false, @@ -171,7 +103,6 @@ static struct i2c_board_info __initdata u8500_i2c3_devices_stuib[] = { I2C_BOARD_INFO("bu21013_tp", 0x5D), .platform_data = &tsc_plat_device, }, - }; void __init mop500_stuib_init(void) diff --git a/drivers/input/touchscreen/bu21013_ts.c b/drivers/input/touchscreen/bu21013_ts.c index 1e8cddd06c60..c6f6a04ec673 100644 --- a/drivers/input/touchscreen/bu21013_ts.c +++ b/drivers/input/touchscreen/bu21013_ts.c @@ -14,6 +14,7 @@ #include #include #include +#include #define PEN_DOWN_INTR 0 #define MAX_FINGERS 2 @@ -148,11 +149,12 @@ struct bu21013_ts_data { struct i2c_client *client; wait_queue_head_t wait; - bool touch_stopped; const struct bu21013_platform_device *chip; struct input_dev *in_dev; - unsigned int intr_pin; struct regulator *regulator; + unsigned int irq; + unsigned int intr_pin; + bool touch_stopped; }; /** @@ -262,7 +264,7 @@ static irqreturn_t bu21013_gpio_irq(int irq, void *device_data) return IRQ_NONE; } - data->intr_pin = data->chip->irq_read_val(); + data->intr_pin = gpio_get_value(data->chip->touch_pin); if (data->intr_pin == PEN_DOWN_INTR) wait_event_timeout(data->wait, data->touch_stopped, msecs_to_jiffies(2)); @@ -418,9 +420,32 @@ static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data) { bu21013_data->touch_stopped = true; wake_up(&bu21013_data->wait); - free_irq(bu21013_data->chip->irq, bu21013_data); + free_irq(bu21013_data->irq, bu21013_data); } +/** + * bu21013_cs_disable() - deconfigures the touch panel controller + * @bu21013_data: device structure pointer + * + * This function is used to deconfigure the chip selection + * for touch panel controller. + */ +static void bu21013_cs_disable(struct bu21013_ts_data *bu21013_data) +{ + int error; + + error = gpio_direction_output(bu21013_data->chip->cs_pin, 0); + if (error < 0) + dev_warn(&bu21013_data->client->dev, + "%s: gpio direction failed, error: %d\n", + __func__, error); + else + gpio_set_value(bu21013_data->chip->cs_pin, 0); + + gpio_free(bu21013_data->chip->cs_pin); +} + + /** * bu21013_probe() - initializes the i2c-client touchscreen driver * @client: i2c client structure pointer @@ -430,7 +455,7 @@ static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data) * driver and returns integer. */ static int bu21013_probe(struct i2c_client *client, - const struct i2c_device_id *id) + const struct i2c_device_id *id) { struct bu21013_ts_data *bu21013_data; struct input_dev *in_dev; @@ -449,6 +474,11 @@ static int bu21013_probe(struct i2c_client *client, return -EINVAL; } + if (!gpio_is_valid(pdata->touch_pin)) { + dev_err(&client->dev, "invalid touch_pin supplied\n"); + return -EINVAL; + } + bu21013_data = kzalloc(sizeof(struct bu21013_ts_data), GFP_KERNEL); in_dev = input_allocate_device(); if (!bu21013_data || !in_dev) { @@ -460,6 +490,7 @@ static int bu21013_probe(struct i2c_client *client, bu21013_data->in_dev = in_dev; bu21013_data->chip = pdata; bu21013_data->client = client; + bu21013_data->irq = gpio_to_irq(pdata->touch_pin); bu21013_data->regulator = regulator_get(&client->dev, "avdd"); if (IS_ERR(bu21013_data->regulator)) { @@ -478,12 +509,11 @@ static int bu21013_probe(struct i2c_client *client, init_waitqueue_head(&bu21013_data->wait); /* configure the gpio pins */ - if (pdata->cs_en) { - error = pdata->cs_en(pdata->cs_pin); - if (error < 0) { - dev_err(&client->dev, "chip init failed\n"); - goto err_disable_regulator; - } + error = gpio_request_one(pdata->cs_pin, GPIOF_OUT_INIT_HIGH, + "touchp_reset"); + if (error < 0) { + dev_err(&client->dev, "Unable to request gpio reset_pin\n"); + goto err_disable_regulator; } /* configure the touch panel controller */ @@ -508,12 +538,13 @@ static int bu21013_probe(struct i2c_client *client, pdata->touch_y_max, 0, 0); input_set_drvdata(in_dev, bu21013_data); - error = request_threaded_irq(pdata->irq, NULL, bu21013_gpio_irq, + error = request_threaded_irq(bu21013_data->irq, NULL, bu21013_gpio_irq, IRQF_TRIGGER_FALLING | IRQF_SHARED | IRQF_ONESHOT, DRIVER_TP, bu21013_data); if (error) { - dev_err(&client->dev, "request irq %d failed\n", pdata->irq); + dev_err(&client->dev, "request irq %d failed\n", + bu21013_data->irq); goto err_cs_disable; } @@ -531,7 +562,7 @@ static int bu21013_probe(struct i2c_client *client, err_free_irq: bu21013_free_irq(bu21013_data); err_cs_disable: - pdata->cs_dis(pdata->cs_pin); + bu21013_cs_disable(bu21013_data); err_disable_regulator: regulator_disable(bu21013_data->regulator); err_put_regulator: @@ -555,7 +586,7 @@ static int bu21013_remove(struct i2c_client *client) bu21013_free_irq(bu21013_data); - bu21013_data->chip->cs_dis(bu21013_data->chip->cs_pin); + bu21013_cs_disable(bu21013_data); input_unregister_device(bu21013_data->in_dev); @@ -584,9 +615,9 @@ static int bu21013_suspend(struct device *dev) bu21013_data->touch_stopped = true; if (device_may_wakeup(&client->dev)) - enable_irq_wake(bu21013_data->chip->irq); + enable_irq_wake(bu21013_data->irq); else - disable_irq(bu21013_data->chip->irq); + disable_irq(bu21013_data->irq); regulator_disable(bu21013_data->regulator); @@ -621,9 +652,9 @@ static int bu21013_resume(struct device *dev) bu21013_data->touch_stopped = false; if (device_may_wakeup(&client->dev)) - disable_irq_wake(bu21013_data->chip->irq); + disable_irq_wake(bu21013_data->irq); else - enable_irq(bu21013_data->chip->irq); + enable_irq(bu21013_data->irq); return 0; } diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h index 05e03284b92a..6230d76bde5d 100644 --- a/include/linux/input/bu21013.h +++ b/include/linux/input/bu21013.h @@ -9,13 +9,10 @@ /** * struct bu21013_platform_device - Handle the platform data - * @cs_en: pointer to the cs enable function - * @cs_dis: pointer to the cs disable function - * @irq_read_val: pointer to read the pen irq value function * @touch_x_max: touch x max * @touch_y_max: touch y max * @cs_pin: chip select pin - * @irq: irq pin + * @touch_pin: touch gpio pin * @ext_clk: external clock flag * @x_flip: x flip flag * @y_flip: y flip flag @@ -24,13 +21,10 @@ * This is used to handle the platform data */ struct bu21013_platform_device { - int (*cs_en)(int reset_pin); - int (*cs_dis)(int reset_pin); - int (*irq_read_val)(void); int touch_x_max; int touch_y_max; unsigned int cs_pin; - unsigned int irq; + unsigned int touch_pin; bool ext_clk; bool x_flip; bool y_flip; -- cgit v1.2.3 From 972deb4f49b5b6703d9c6117ba0aeda2180d4447 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti D Date: Mon, 26 Nov 2012 15:25:11 +0530 Subject: i2c: omap: Remove the OMAP_I2C_FLAG_RESET_REGS_POSTIDLE flag The OMAP_I2C_FLAG_RESET_REGS_POSTIDLE is not used anymore in the i2c driver. Remove the flag. Signed-off-by: Shubhrajyoti D Reviewed-by: Felipe Balbi Signed-off-by: Wolfram Sang --- arch/arm/mach-omap2/omap_hwmod_33xx_data.c | 3 +-- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++------ arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 3 +-- drivers/i2c/busses/i2c-omap.c | 3 +-- include/linux/i2c-omap.h | 1 - 5 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c index 59d5c1cd316d..c9a186bc6d40 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c @@ -1103,8 +1103,7 @@ static struct omap_hwmod_class i2c_class = { }; static struct omap_i2c_dev_attr i2c_dev_attr = { - .flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE | - OMAP_I2C_FLAG_RESET_REGS_POSTIDLE, + .flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE, }; /* i2c1 */ diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 943222c40489..36270bb637e4 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -791,8 +791,7 @@ static struct omap_hwmod omap3xxx_dss_venc_hwmod = { /* I2C1 */ static struct omap_i2c_dev_attr i2c1_dev_attr = { .fifo_depth = 8, /* bytes */ - .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | - OMAP_I2C_FLAG_BUS_SHIFT_2, + .flags = OMAP_I2C_FLAG_BUS_SHIFT_2, }; static struct omap_hwmod omap3xxx_i2c1_hwmod = { @@ -817,8 +816,7 @@ static struct omap_hwmod omap3xxx_i2c1_hwmod = { /* I2C2 */ static struct omap_i2c_dev_attr i2c2_dev_attr = { .fifo_depth = 8, /* bytes */ - .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | - OMAP_I2C_FLAG_BUS_SHIFT_2, + .flags = OMAP_I2C_FLAG_BUS_SHIFT_2, }; static struct omap_hwmod omap3xxx_i2c2_hwmod = { @@ -843,8 +841,7 @@ static struct omap_hwmod omap3xxx_i2c2_hwmod = { /* I2C3 */ static struct omap_i2c_dev_attr i2c3_dev_attr = { .fifo_depth = 64, /* bytes */ - .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | - OMAP_I2C_FLAG_BUS_SHIFT_2, + .flags = OMAP_I2C_FLAG_BUS_SHIFT_2, }; static struct omap_hwmod_irq_info i2c3_mpu_irqs[] = { diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index 652d0285bd6d..eb40dbc6688e 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -1526,8 +1526,7 @@ static struct omap_hwmod_class omap44xx_i2c_hwmod_class = { }; static struct omap_i2c_dev_attr i2c_dev_attr = { - .flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE | - OMAP_I2C_FLAG_RESET_REGS_POSTIDLE, + .flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE, }; /* i2c1 */ diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 248280136668..7a62acb7d262 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1038,8 +1038,7 @@ static const struct i2c_algorithm omap_i2c_algo = { #ifdef CONFIG_OF static struct omap_i2c_bus_platform_data omap3_pdata = { .rev = OMAP_I2C_IP_VERSION_1, - .flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE | - OMAP_I2C_FLAG_BUS_SHIFT_2, + .flags = OMAP_I2C_FLAG_BUS_SHIFT_2, }; static struct omap_i2c_bus_platform_data omap4_pdata = { diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h index 1b25c04f82d9..babe0cf6d56b 100644 --- a/include/linux/i2c-omap.h +++ b/include/linux/i2c-omap.h @@ -20,7 +20,6 @@ #define OMAP_I2C_FLAG_NO_FIFO BIT(0) #define OMAP_I2C_FLAG_SIMPLE_CLOCK BIT(1) #define OMAP_I2C_FLAG_16BIT_DATA_REG BIT(2) -#define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE BIT(3) #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK BIT(5) #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK BIT(6) /* how the CPU address bus must be translated for I2C unit access */ -- cgit v1.2.3 From 4b9347dcbe3b426d19bda99b2061a2d1fe5a2dce Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 15 Oct 2012 16:03:51 +0200 Subject: common: DMA-mapping: add DMA_ATTR_FORCE_CONTIGUOUS attribute This patch adds DMA_ATTR_FORCE_CONTIGUOUS attribute to the DMA-mapping subsystem. By default DMA-mapping subsystem is allowed to assemble the buffer allocated by dma_alloc_attrs() function from individual pages if it can be mapped as contiguous chunk into device dma address space. By specifing this attribute the allocated buffer is forced to be contiguous also in physical memory. Signed-off-by: Marek Szyprowski --- Documentation/DMA-attributes.txt | 9 +++++++++ include/linux/dma-attrs.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt index f50309081ac7..e59480db9ee0 100644 --- a/Documentation/DMA-attributes.txt +++ b/Documentation/DMA-attributes.txt @@ -91,3 +91,12 @@ transferred to 'device' domain. This attribute can be also used for dma_unmap_{single,page,sg} functions family to force buffer to stay in device domain after releasing a mapping for it. Use this attribute with care! + +DMA_ATTR_FORCE_CONTIGUOUS +------------------------- + +By default DMA-mapping subsystem is allowed to assemble the buffer +allocated by dma_alloc_attrs() function from individual pages if it can +be mapped as contiguous chunk into device dma address space. By +specifing this attribute the allocated buffer is forced to be contiguous +also in physical memory. diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h index f83f793223ff..c8e1831d7572 100644 --- a/include/linux/dma-attrs.h +++ b/include/linux/dma-attrs.h @@ -17,6 +17,7 @@ enum dma_attr { DMA_ATTR_NON_CONSISTENT, DMA_ATTR_NO_KERNEL_MAPPING, DMA_ATTR_SKIP_CPU_SYNC, + DMA_ATTR_FORCE_CONTIGUOUS, DMA_ATTR_MAX, }; -- cgit v1.2.3 From eed8c02e680c04cd737e0a9cef74e68d8eb0cefa Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 30 Nov 2012 11:42:40 +0100 Subject: wait: add wait_event_lock_irq() interface New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit moves the private wait_event_lock_irq() macro from MD to regular wait includes, introduces new macro wait_event_lock_irq_cmd() instead of using the old method with omitting cmd parameter which is ugly and makes a use of new macros in the MD. It also introduces the _interruptible_ variant. The use of new interface is when one have a special lock to protect data structures used in the condition, or one also needs to invoke "cmd" before putting it to sleep. All new macros are expected to be called with the lock taken. The lock is released before sleep and is reacquired afterwards. We will leave the macro with the lock held. Note to DM: IMO this should also fix theoretical race on waitqueue while using simultaneously wait_event_lock_irq() and wait_event() because of lack of locking around current state setting and wait queue removal. Signed-off-by: Lukas Czerner Cc: Neil Brown Cc: David Howells Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- drivers/md/md.c | 2 +- drivers/md/md.h | 26 -------- drivers/md/raid1.c | 15 +++-- drivers/md/raid10.c | 15 +++-- drivers/md/raid5.c | 12 ++-- include/linux/wait.h | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 184 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9ab768acfb62..7e513a38cec7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) spin_lock_irq(&mddev->write_lock); wait_event_lock_irq(mddev->sb_wait, !mddev->flush_bio, - mddev->write_lock, /*nothing*/); + mddev->write_lock); mddev->flush_bio = bio; spin_unlock_irq(&mddev->write_lock); diff --git a/drivers/md/md.h b/drivers/md/md.h index af443ab868db..1e2fc3d9c74c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -551,32 +551,6 @@ struct md_thread { #define THREAD_WAKEUP 0 -#define __wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_event_lock_irq(wq, condition, lock, cmd) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, cmd); \ -} while (0) - static inline void safe_put_page(struct page *p) { if (p) put_page(p); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8034fbd6190c..534dd74a2da0 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf) /* Wait until no block IO is waiting */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, - conf->resync_lock, ); + conf->resync_lock); /* block any new IO from starting */ conf->barrier++; @@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf) /* Now wait for all pending IO to complete */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_pending && conf->barrier < RESYNC_DEPTH, - conf->resync_lock, ); + conf->resync_lock); spin_unlock_irq(&conf->resync_lock); } @@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf) (conf->nr_pending && current->bio_list && !bio_list_empty(current->bio_list)), - conf->resync_lock, - ); + conf->resync_lock); conf->nr_waiting--; } conf->nr_pending++; @@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf) spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, - conf->nr_pending == conf->nr_queued+1, - conf->resync_lock, - flush_pending_writes(conf)); + wait_event_lock_irq_cmd(conf->wait_barrier, + conf->nr_pending == conf->nr_queued+1, + conf->resync_lock, + flush_pending_writes(conf)); spin_unlock_irq(&conf->resync_lock); } static void unfreeze_array(struct r1conf *conf) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 906ccbd0f7dc..9a08f621b27d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force) /* Wait until no block IO is waiting (unless 'force') */ wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, - conf->resync_lock, ); + conf->resync_lock); /* block any new IO from starting */ conf->barrier++; @@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force) /* Now wait for all pending IO to complete */ wait_event_lock_irq(conf->wait_barrier, !conf->nr_pending && conf->barrier < RESYNC_DEPTH, - conf->resync_lock, ); + conf->resync_lock); spin_unlock_irq(&conf->resync_lock); } @@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf) (conf->nr_pending && current->bio_list && !bio_list_empty(current->bio_list)), - conf->resync_lock, - ); + conf->resync_lock); conf->nr_waiting--; } conf->nr_pending++; @@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf) spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, - conf->nr_pending == conf->nr_queued+1, - conf->resync_lock, - flush_pending_writes(conf)); + wait_event_lock_irq_cmd(conf->wait_barrier, + conf->nr_pending == conf->nr_queued+1, + conf->resync_lock, + flush_pending_writes(conf)); spin_unlock_irq(&conf->resync_lock); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c5439dce0295..2bf617d6f4fd 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector, do { wait_event_lock_irq(conf->wait_for_stripe, conf->quiesce == 0 || noquiesce, - conf->device_lock, /* nothing */); + conf->device_lock); sh = __find_stripe(conf, sector, conf->generation - previous); if (!sh) { if (!conf->inactive_blocked) @@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector, (atomic_read(&conf->active_stripes) < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), - conf->device_lock, - ); + conf->device_lock); conf->inactive_blocked = 0; } else init_stripe(sh, sector, previous); @@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list), - conf->device_lock, - ); + conf->device_lock); osh = get_free_stripe(conf); spin_unlock_irq(&conf->device_lock); atomic_set(&nsh->count, 1); @@ -4000,7 +3998,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, conf->quiesce == 0, - conf->device_lock, /* nothing */); + conf->device_lock); atomic_inc(&conf->active_aligned_reads); spin_unlock_irq(&conf->device_lock); @@ -6088,7 +6086,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) wait_event_lock_irq(conf->wait_for_stripe, atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_aligned_reads) == 0, - conf->device_lock, /* nothing */); + conf->device_lock); conf->quiesce = 1; spin_unlock_irq(&conf->device_lock); /* allow reshape to continue */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 168dfe122dd3..7cb64d4b499d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -550,6 +550,170 @@ do { \ __ret; \ }) + +#define __wait_event_lock_irq(wq, condition, lock, cmd) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_lock_irq_cmd - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd + * and schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + */ +#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, cmd); \ +} while (0) + +/** + * wait_event_lock_irq - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + */ +#define wait_event_lock_irq(wq, condition, lock) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, ); \ +} while (0) + + +#define __wait_event_interruptible_lock_irq(wq, condition, \ + lock, ret, cmd) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true. + * The condition is checked under the lock. This is expected to + * be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd and + * schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ +({ \ + int __ret = 0; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq(wq, condition, \ + lock, __ret, cmd); \ + __ret; \ +}) + +/** + * wait_event_interruptible_lock_irq - sleep until a condition gets true. + * The condition is checked under the lock. This is expected + * to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq(wq, condition, lock) \ +({ \ + int __ret = 0; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq(wq, condition, \ + lock, __ret, ); \ + __ret; \ +}) + + /* * These are the old interfaces to sleep waiting for an event. * They are racy. DO NOT use them, use the wait_event* interfaces above. -- cgit v1.2.3 From 7b5a35225b0d4fd779cf79d7624e63d1957f6c4d Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 30 Nov 2012 11:42:41 +0100 Subject: loop: Limit the number of requests in the bio list Currently there is not limitation of number of requests in the loop bio list. This can lead into some nasty situations when the caller spawns tons of bio requests taking huge amount of memory. This is even more obvious with discard where blkdev_issue_discard() will submit all bios for the range and wait for them to finish afterwards. On really big loop devices and slow backing file system this can lead to OOM situation as reported by Dave Chinner. With this patch we will wait in loop_make_request() if the number of bios in the loop bio list would exceed 'nr_congestion_on'. We'll wake up the process as we process the bios form the list. Some threshold hysteresis is in place to avoid high frequency oscillation. Signed-off-by: Lukas Czerner Reported-by: Dave Chinner Signed-off-by: Jens Axboe --- drivers/block/loop.c | 10 ++++++++++ include/linux/loop.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e9d594fd12cb..800aec7927de 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -463,6 +463,7 @@ out: */ static void loop_add_bio(struct loop_device *lo, struct bio *bio) { + lo->lo_bio_count++; bio_list_add(&lo->lo_bio_list, bio); } @@ -471,6 +472,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio) */ static struct bio *loop_get_bio(struct loop_device *lo) { + lo->lo_bio_count--; return bio_list_pop(&lo->lo_bio_list); } @@ -489,6 +491,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio) goto out; if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) goto out; + if (lo->lo_bio_count >= q->nr_congestion_on) + wait_event_lock_irq(lo->lo_req_wait, + lo->lo_bio_count < q->nr_congestion_off, + lo->lo_lock); loop_add_bio(lo, old_bio); wake_up(&lo->lo_event); spin_unlock_irq(&lo->lo_lock); @@ -546,6 +552,8 @@ static int loop_thread(void *data) continue; spin_lock_irq(&lo->lo_lock); bio = loop_get_bio(lo); + if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off) + wake_up(&lo->lo_req_wait); spin_unlock_irq(&lo->lo_lock); BUG_ON(!bio); @@ -873,6 +881,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->transfer = transfer_none; lo->ioctl = NULL; lo->lo_sizelimit = 0; + lo->lo_bio_count = 0; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); @@ -1660,6 +1669,7 @@ static int loop_add(struct loop_device **l, int i) lo->lo_number = i; lo->lo_thread = NULL; init_waitqueue_head(&lo->lo_event); + init_waitqueue_head(&lo->lo_req_wait); spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; disk->first_minor = i << part_shift; diff --git a/include/linux/loop.h b/include/linux/loop.h index 6492181bcb1d..460b60fa7adf 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -53,10 +53,13 @@ struct loop_device { spinlock_t lo_lock; struct bio_list lo_bio_list; + unsigned int lo_bio_count; int lo_state; struct mutex lo_ctl_mutex; struct task_struct *lo_thread; wait_queue_head_t lo_event; + /* wait queue for incoming requests */ + wait_queue_head_t lo_req_wait; struct request_queue *lo_queue; struct gendisk *lo_disk; -- cgit v1.2.3 From 12a5105e04143569b3e9e5ef03cf9cad8862473a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 29 Nov 2012 00:17:15 +0530 Subject: mfd: stmpe: Get rid of irq_invert_polarity Since the very first patch, stmpe core driver is using irq_invert_polarity as part of platform data. But, nobody is actually using it in kernel till now. Also, this is not something part of hardware specs, but is included to cater some board mistakes or quirks. So, better get rid of it. This is earlier discussed here: https://lkml.org/lkml/2012/11/27/636 Signed-off-by: Viresh Kumar Acked-by: Lee Jones Signed-off-by: Samuel Ortiz --- drivers/mfd/stmpe.c | 7 ------- include/linux/mfd/stmpe.h | 2 -- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index f9f7de796029..90c6151bc52e 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -953,13 +953,6 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe) else icr |= STMPE_ICR_LSB_HIGH; } - - if (stmpe->pdata->irq_invert_polarity) { - if (id == STMPE801_ID) - icr ^= STMPE801_REG_SYS_CTRL_INT_HI; - else - icr ^= STMPE_ICR_LSB_HIGH; - } } if (stmpe->pdata->autosleep) { diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index 15dac790365f..383ac1512a39 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -190,7 +190,6 @@ struct stmpe_ts_platform_data { * @id: device id to distinguish between multiple STMPEs on the same board * @blocks: bitmask of blocks to enable (use STMPE_BLOCK_*) * @irq_trigger: IRQ trigger to use for the interrupt to the host - * @irq_invert_polarity: IRQ line is connected with reversed polarity * @autosleep: bool to enable/disable stmpe autosleep * @autosleep_timeout: inactivity timeout in milliseconds for autosleep * @irq_base: base IRQ number. %STMPE_NR_IRQS irqs will be used, or @@ -207,7 +206,6 @@ struct stmpe_platform_data { unsigned int blocks; int irq_base; unsigned int irq_trigger; - bool irq_invert_polarity; bool autosleep; bool irq_over_gpio; int irq_gpio; -- cgit v1.2.3 From 0e5fca8106199f5c680bb93e75c16381c4c256ce Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 29 Nov 2012 08:48:26 +0000 Subject: mfd: tps65910: Remove unused data The 'io_mutex' is not used anywhere. The regmap API supports the mutex internally, so no additional mutex is required. And 'domain' private data is unnecessary because the irq domain is already registered by using regmap_add_irq_chip(). Signed-off-by: Milo(Woogyom) Kim Acked-by: Laxman Dewangan Signed-off-by: Samuel Ortiz --- drivers/mfd/tps65910.c | 1 - include/linux/mfd/tps65910.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index ca3783350ccf..c160c2d76f79 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -486,7 +486,6 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c, tps65910->dev = &i2c->dev; tps65910->i2c_client = i2c; tps65910->id = chip_id; - mutex_init(&tps65910->io_mutex); tps65910->regmap = devm_regmap_init_i2c(i2c, &tps65910_regmap_config); if (IS_ERR(tps65910->regmap)) { diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 0b16903f7e88..20e433e551e3 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -893,7 +893,6 @@ struct tps65910 { struct device *dev; struct i2c_client *i2c_client; struct regmap *regmap; - struct mutex io_mutex; unsigned int id; /* Client devices */ @@ -907,7 +906,6 @@ struct tps65910 { /* IRQ Handling */ int chip_irq; struct regmap_irq_chip_data *irq_data; - struct irq_domain *domain; }; struct tps65910_platform_data { -- cgit v1.2.3 From 78ad724ade23c66650dcaba04f4d2307c2884c69 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 30 Nov 2012 07:58:10 +0530 Subject: [SCSI] miscdevice: Adding support for MPT3SAS_MINOR(222) Signed-off-by: Sreekanth Reddy Reviewed-by: Nagalakshmi Nandigama Signed-off-by: James Bottomley --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index e0deeb2cc939..09c2300ddb37 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -34,6 +34,7 @@ #define MWAVE_MINOR 219 /* ACP/Mwave Modem */ #define MPT_MINOR 220 #define MPT2SAS_MINOR 221 +#define MPT3SAS_MINOR 222 #define UINPUT_MINOR 223 #define MISC_MCELOG_MINOR 227 #define HPET_MINOR 228 -- cgit v1.2.3 From 0d0cdb028f9d9771e2b346038707734121f906e3 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Mon, 26 Nov 2012 13:55:25 +0800 Subject: libata: restore acpi disable functionality Commit 66fa7f215 "libata-acpi: improve ACPI disabling" introdcued the behaviour of disabling ATA ACPI if ata_acpi_on_devcfg failed the 2nd time, but commit 30dcf76ac dropped this behaviour and this caused problem for Dimitris Damigos, where his laptop can not resume correctly. The bugzilla page for it is: https://bugzilla.kernel.org/show_bug.cgi?id=49331 The problem is, ata_dev_push_id will fail the 2nd time it is invoked, and due to disabling ACPI code is dropped, ata_acpi_on_devcfg which calls ata_dev_push_id will keep failing and eventually made the device disabled. This patch restores the original behaviour, if acpi failed the 2nd time, disable acpi functionality for the device(and we do not event need to add a debug message for this as it is still there ;-). Reported-by: Dimitris Damigos Signed-off-by: Aaron Lu Cc: Signed-off-by: Jeff Garzik --- drivers/ata/libata-acpi.c | 4 ++++ include/linux/libata.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 5b0ba3f20edc..ef01ac07502e 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -76,6 +76,9 @@ acpi_handle ata_dev_acpi_handle(struct ata_device *dev) acpi_integer adr; struct ata_port *ap = dev->link->ap; + if (dev->flags & ATA_DFLAG_ACPI_DISABLED) + return NULL; + if (ap->flags & ATA_FLAG_ACPI_SATA) { if (!sata_pmp_attached(ap)) adr = SATA_ADR(ap->port_no, NO_PORT_MULT); @@ -945,6 +948,7 @@ int ata_acpi_on_devcfg(struct ata_device *dev) return rc; } + dev->flags |= ATA_DFLAG_ACPI_DISABLED; ata_dev_warn(dev, "ACPI: failed the second time, disabled\n"); /* We can safely continue if no _GTF command has been executed diff --git a/include/linux/libata.h b/include/linux/libata.h index 77eeeda2b6e2..e931c9ad1078 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -163,6 +163,7 @@ enum { ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ + ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ -- cgit v1.2.3 From de90cd71f68e947d3bd6c3f2ef5731ead010a768 Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Sun, 18 Nov 2012 04:44:41 +0800 Subject: libata: check SATA_SETTINGS log with HW Feature Ctrl NCQ capability was used to check availability of SATA Settings page from Identify Device Data Log, which contains DevSlp timing variables. It does not work on some HDDs and leads to error messages. IDENTIFY word 78 bit 5(Hardware Feature Control) should be used. Quoting SATA spec 3.1: If Hardware Feature Control is supported, then: a) IDENTIFY DEVICE data word 78 bit 5 (see 13.2.1.18) shall be set to one; b) the SET FEATURES Select Hardware Feature Control subcommand shall be supported (see 13.3.8); c) page 08h of the Identify Device Data log (see 13.7.7) shall be supported; This patch is not tested on SATA HDD with DevSlp supported. Reported-by: Borislav Petkov Signed-off-by: Shane Huang Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 3 +-- include/linux/ata.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index f46fbd3bd3fb..caffe73c1e4a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2330,9 +2330,8 @@ int ata_dev_configure(struct ata_device *dev) /* Obtain SATA Settings page from Identify Device Data Log, * which contains DevSlp timing variables etc. - * Exclude old devices with ata_id_has_ncq() */ - if (ata_id_has_ncq(dev->id)) { + if (ata_id_has_hw_feature_ctrl(dev->id)) { err_mask = ata_read_log_page(dev, ATA_LOG_SATA_ID_DEV_DATA, ATA_LOG_SATA_SETTINGS, diff --git a/include/linux/ata.h b/include/linux/ata.h index 408da9502177..18cbb93fdbca 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -593,6 +593,7 @@ static inline int ata_is_data(u8 prot) #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) +#define ata_id_has_hw_feature_ctrl(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 5)) static inline bool ata_id_has_hipm(const u16 *id) { -- cgit v1.2.3 From b7db04d9264fca4b00e949da7b3180c50e243fca Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 2 Nov 2012 12:29:32 -0700 Subject: libata: implement ata_platform_remove_one() This relatively simple boiler-plate code is repeated in several platform drivers. We should implement a common version in libata. Signed-off-by: Brian Norris Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 23 +++++++++++++++++++++++ include/linux/libata.h | 4 ++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 583f26d6d455..8e3f4a90c088 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -67,6 +67,7 @@ #include #include #include +#include #include "libata.h" #include "libata-transport.h" @@ -6382,6 +6383,26 @@ int ata_pci_device_resume(struct pci_dev *pdev) #endif /* CONFIG_PCI */ +/** + * ata_platform_remove_one - Platform layer callback for device removal + * @pdev: Platform device that was removed + * + * Platform layer indicates to libata via this hook that hot-unplug or + * module unload event has occurred. Detach all ports. Resource + * release is handled via devres. + * + * LOCKING: + * Inherited from platform layer (may sleep). + */ +int ata_platform_remove_one(struct platform_device *pdev) +{ + struct ata_host *host = platform_get_drvdata(pdev); + + ata_host_detach(host); + + return 0; +} + static int __init ata_parse_force_one(char **cur, struct ata_force_ent *force_ent, const char **reason) @@ -6877,6 +6898,8 @@ EXPORT_SYMBOL_GPL(ata_pci_device_resume); #endif /* CONFIG_PM */ #endif /* CONFIG_PCI */ +EXPORT_SYMBOL_GPL(ata_platform_remove_one); + EXPORT_SYMBOL_GPL(__ata_ehi_push_desc); EXPORT_SYMBOL_GPL(ata_ehi_push_desc); EXPORT_SYMBOL_GPL(ata_ehi_clear_desc); diff --git a/include/linux/libata.h b/include/linux/libata.h index e931c9ad1078..83ba0ab2c915 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1115,6 +1115,10 @@ extern int ata_pci_device_resume(struct pci_dev *pdev); #endif /* CONFIG_PM */ #endif /* CONFIG_PCI */ +struct platform_device; + +extern int ata_platform_remove_one(struct platform_device *pdev); + /* * ACPI - drivers/ata/libata-acpi.c */ -- cgit v1.2.3 From 156f34d26a74c6ea060fb43d898f17509ed8e18a Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 2 Nov 2012 00:46:24 -0700 Subject: pata_platform: remove unused remove function All users of __pata_platform_remove() have been converted to utilize the common ata_platform_remove_one(). Signed-off-by: Brian Norris Signed-off-by: Jeff Garzik --- drivers/ata/pata_platform.c | 17 ----------------- include/linux/ata_platform.h | 2 -- 2 files changed, 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index 449aa29931bc..f4372d0c7ce6 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -178,23 +178,6 @@ int __devinit __pata_platform_probe(struct device *dev, } EXPORT_SYMBOL_GPL(__pata_platform_probe); -/** - * __pata_platform_remove - unplug a platform interface - * @dev: device - * - * A platform bus ATA device has been unplugged. Perform the needed - * cleanup. Also called on module unload for any active devices. - */ -int __pata_platform_remove(struct device *dev) -{ - struct ata_host *host = dev_get_drvdata(dev); - - ata_host_detach(host); - - return 0; -} -EXPORT_SYMBOL_GPL(__pata_platform_remove); - static int __devinit pata_platform_probe(struct platform_device *pdev) { struct resource *io_res; diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index b856a2a590d9..fe9989636b62 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -22,8 +22,6 @@ extern int __devinit __pata_platform_probe(struct device *dev, unsigned int ioport_shift, int __pio_mask); -extern int __devexit __pata_platform_remove(struct device *dev); - /* * Marvell SATA private data */ -- cgit v1.2.3 From 64b37b2a63eb2f80b65c7185f0013f8ffc637ae3 Mon Sep 17 00:00:00 2001 From: Matthieu CASTET Date: Tue, 6 Nov 2012 11:51:44 +0100 Subject: mtd: nand: add NAND_BUSWIDTH_AUTO to autodetect bus width The driver call nand_scan_ident in 8 bit mode, then readid or onfi detection are done (and detect bus width). The driver should update its bus width before calling nand_scan_tail. This work because readid and onfi are read work 8 byte mode. Note that nand_scan_ident send command (NAND_CMD_RESET, NAND_CMD_READID, NAND_CMD_PARAM), address and read data The ONFI specificication is not very clear for x16 device if high byte of address should be driven to 0, but according to [1] it should be ok to not drive it during autodetection. [1] 3.3.2. Target Initialization [...] The Read ID and Read Parameter Page commands only use the lower 8-bits of the data bus. The host shall not issue commands that use a word data width on x16 devices until the host determines the device supports a 16-bit data bus width in the parameter page. Signed-off-by: Matthieu CASTET Signed-off-by: Artem Bityutskiy --- drivers/mtd/nand/nand_base.c | 14 +++++++++----- include/linux/mtd/nand.h | 7 +++++++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 6f58e1633e2f..5851c51ac2df 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -3250,11 +3250,15 @@ ident_done: break; } - /* - * Check, if buswidth is correct. Hardware drivers should set - * chip correct! - */ - if (busw != (chip->options & NAND_BUSWIDTH_16)) { + if (chip->options & NAND_BUSWIDTH_AUTO) { + WARN_ON(chip->options & NAND_BUSWIDTH_16); + chip->options |= busw; + nand_set_defaults(chip, busw); + } else if (busw != (chip->options & NAND_BUSWIDTH_16)) { + /* + * Check, if buswidth is correct. Hardware drivers should set + * chip correct! + */ pr_info("NAND device: Manufacturer ID:" " 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id, *dev_id, nand_manuf_ids[maf_idx].name, mtd->name); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9d8a6048aacd..7ccb3c59ed60 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -219,6 +219,13 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 +/* + * Autodetect nand buswidth with readid/onfi. + * This suppose the driver will configure the hardware in 8 bits mode + * when calling nand_scan_ident, and update its configuration + * before calling nand_scan_tail. + */ +#define NAND_BUSWIDTH_AUTO 0x00080000 /* Options set by nand scan */ /* Nand scan has allocated controller struct */ -- cgit v1.2.3 From 72b15b6ae97796c5fac687addde5dbfab872cf94 Mon Sep 17 00:00:00 2001 From: Omar Ramirez Luna Date: Mon, 19 Nov 2012 19:05:50 -0600 Subject: iommu/omap: Migrate to hwmod framework Use hwmod data and device attributes to build and register an omap device for iommu driver. - Update the naming convention in isp module. - Remove unneeded check for number of resources, as this is now handled by omap_device and prevents driver from loading. - Now unused, remove platform device and resource data, handling of sysconfig register for softreset purposes, use default latency structure. - Use hwmod API for reset handling. Signed-off-by: Omar Ramirez Luna Tested-by: Ohad Ben-Cohen Signed-off-by: Joerg Roedel --- arch/arm/mach-omap2/devices.c | 2 +- arch/arm/mach-omap2/omap-iommu.c | 168 +++++++------------------------ drivers/iommu/omap-iommu.c | 23 ++++- drivers/iommu/omap-iommu2.c | 19 ---- include/linux/platform_data/iommu-omap.h | 8 +- 5 files changed, 64 insertions(+), 156 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index c15f5a97b51c..787a996ec4eb 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -214,7 +214,7 @@ static struct platform_device omap3isp_device = { }; static struct omap_iommu_arch_data omap3_isp_iommu = { - .name = "isp", + .name = "mmu_isp", }; int omap3_init_camera(struct isp_platform_data *pdata) diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index a6a4ff8744b7..02726a647b1d 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -12,153 +12,61 @@ #include #include +#include +#include #include +#include +#include -#include "soc.h" -#include "common.h" - -struct iommu_device { - resource_size_t base; - int irq; - struct iommu_platform_data pdata; - struct resource res[2]; -}; -static struct iommu_device *devices; -static int num_iommu_devices; - -#ifdef CONFIG_ARCH_OMAP3 -static struct iommu_device omap3_devices[] = { - { - .base = 0x480bd400, - .irq = 24 + OMAP_INTC_START, - .pdata = { - .name = "isp", - .nr_tlb_entries = 8, - .clk_name = "cam_ick", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, -#if defined(CONFIG_OMAP_IOMMU_IVA2) - { - .base = 0x5d000000, - .irq = 28 + OMAP_INTC_START, - .pdata = { - .name = "iva2", - .nr_tlb_entries = 32, - .clk_name = "iva2_ck", - .da_start = 0x11000000, - .da_end = 0xFFFFF000, - }, - }, -#endif -}; -#define NR_OMAP3_IOMMU_DEVICES ARRAY_SIZE(omap3_devices) -static struct platform_device *omap3_iommu_pdev[NR_OMAP3_IOMMU_DEVICES]; -#else -#define omap3_devices NULL -#define NR_OMAP3_IOMMU_DEVICES 0 -#define omap3_iommu_pdev NULL -#endif - -#ifdef CONFIG_ARCH_OMAP4 -static struct iommu_device omap4_devices[] = { - { - .base = OMAP4_MMU1_BASE, - .irq = 100 + OMAP44XX_IRQ_GIC_START, - .pdata = { - .name = "ducati", - .nr_tlb_entries = 32, - .clk_name = "ipu_fck", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, - { - .base = OMAP4_MMU2_BASE, - .irq = 28 + OMAP44XX_IRQ_GIC_START, - .pdata = { - .name = "tesla", - .nr_tlb_entries = 32, - .clk_name = "dsp_fck", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, -}; -#define NR_OMAP4_IOMMU_DEVICES ARRAY_SIZE(omap4_devices) -static struct platform_device *omap4_iommu_pdev[NR_OMAP4_IOMMU_DEVICES]; -#else -#define omap4_devices NULL -#define NR_OMAP4_IOMMU_DEVICES 0 -#define omap4_iommu_pdev NULL -#endif - -static struct platform_device **omap_iommu_pdev; - -static int __init omap_iommu_init(void) +static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused) { - int i, err; - struct resource res[] = { - { .flags = IORESOURCE_MEM }, - { .flags = IORESOURCE_IRQ }, - }; + struct platform_device *pdev; + struct iommu_platform_data *pdata; + struct omap_mmu_dev_attr *a = (struct omap_mmu_dev_attr *)oh->dev_attr; + static int i; + + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + pdata->name = oh->name; + pdata->clk_name = oh->main_clk; + pdata->nr_tlb_entries = a->nr_tlb_entries; + pdata->da_start = a->da_start; + pdata->da_end = a->da_end; + + if (oh->rst_lines_cnt == 1) { + pdata->reset_name = oh->rst_lines->name; + pdata->assert_reset = omap_device_assert_hardreset; + pdata->deassert_reset = omap_device_deassert_hardreset; + } - if (cpu_is_omap34xx()) { - devices = omap3_devices; - omap_iommu_pdev = omap3_iommu_pdev; - num_iommu_devices = NR_OMAP3_IOMMU_DEVICES; - } else if (cpu_is_omap44xx()) { - devices = omap4_devices; - omap_iommu_pdev = omap4_iommu_pdev; - num_iommu_devices = NR_OMAP4_IOMMU_DEVICES; - } else - return -ENODEV; + pdev = omap_device_build("omap-iommu", i, oh, pdata, sizeof(*pdata), + NULL, 0, 0); - for (i = 0; i < num_iommu_devices; i++) { - struct platform_device *pdev; - const struct iommu_device *d = &devices[i]; + kfree(pdata); - pdev = platform_device_alloc("omap-iommu", i); - if (!pdev) { - err = -ENOMEM; - goto err_out; - } + if (IS_ERR(pdev)) { + pr_err("%s: device build err: %ld\n", __func__, PTR_ERR(pdev)); + return PTR_ERR(pdev); + } - res[0].start = d->base; - res[0].end = d->base + MMU_REG_SIZE - 1; - res[1].start = res[1].end = d->irq; + i++; - err = platform_device_add_resources(pdev, res, - ARRAY_SIZE(res)); - if (err) - goto err_out; - err = platform_device_add_data(pdev, &d->pdata, - sizeof(d->pdata)); - if (err) - goto err_out; - err = platform_device_add(pdev); - if (err) - goto err_out; - omap_iommu_pdev[i] = pdev; - } return 0; +} -err_out: - while (i--) - platform_device_put(omap_iommu_pdev[i]); - return err; +static int __init omap_iommu_init(void) +{ + return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL); } /* must be ready before omap3isp is probed */ subsys_initcall(omap_iommu_init); static void __exit omap_iommu_exit(void) { - int i; - - for (i = 0; i < num_iommu_devices; i++) - platform_device_unregister(omap_iommu_pdev[i]); + /* Do nothing */ } module_exit(omap_iommu_exit); diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index f8082da6179b..af9b4f31f594 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -143,13 +143,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_arch_version); static int iommu_enable(struct omap_iommu *obj) { int err; + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (!obj) + if (!obj || !pdata) return -EINVAL; if (!arch_iommu) return -ENODEV; + if (pdata->deassert_reset) { + err = pdata->deassert_reset(pdev, pdata->reset_name); + if (err) { + dev_err(obj->dev, "deassert_reset failed: %d\n", err); + return err; + } + } + clk_enable(obj->clk); err = arch_iommu->enable(obj); @@ -159,12 +169,18 @@ static int iommu_enable(struct omap_iommu *obj) static void iommu_disable(struct omap_iommu *obj) { - if (!obj) + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = pdev->dev.platform_data; + + if (!obj || !pdata) return; arch_iommu->disable(obj); clk_disable(obj->clk); + + if (pdata->assert_reset) + pdata->assert_reset(pdev, pdata->reset_name); } /* @@ -926,9 +942,6 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) struct resource *res; struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (pdev->num_resources != 2) - return -EINVAL; - obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); if (!obj) return -ENOMEM; diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c index c02020292377..4a3a1c7a38c1 100644 --- a/drivers/iommu/omap-iommu2.c +++ b/drivers/iommu/omap-iommu2.c @@ -35,12 +35,8 @@ #define MMU_SYS_IDLE_SMART (2 << MMU_SYS_IDLE_SHIFT) #define MMU_SYS_IDLE_MASK (3 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_SOFTRESET (1 << 1) #define MMU_SYS_AUTOIDLE 1 -/* SYSSTATUS */ -#define MMU_SYS_RESETDONE 1 - /* IRQSTATUS & IRQENABLE */ #define MMU_IRQ_MULTIHITFAULT (1 << 4) #define MMU_IRQ_TABLEWALKFAULT (1 << 3) @@ -97,7 +93,6 @@ static void __iommu_set_twl(struct omap_iommu *obj, bool on) static int omap2_iommu_enable(struct omap_iommu *obj) { u32 l, pa; - unsigned long timeout; if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd, SZ_16K)) return -EINVAL; @@ -106,20 +101,6 @@ static int omap2_iommu_enable(struct omap_iommu *obj) if (!IS_ALIGNED(pa, SZ_16K)) return -EINVAL; - iommu_write_reg(obj, MMU_SYS_SOFTRESET, MMU_SYSCONFIG); - - timeout = jiffies + msecs_to_jiffies(20); - do { - l = iommu_read_reg(obj, MMU_SYSSTATUS); - if (l & MMU_SYS_RESETDONE) - break; - } while (!time_after(jiffies, timeout)); - - if (!(l & MMU_SYS_RESETDONE)) { - dev_err(obj->dev, "can't take mmu out of reset\n"); - return -ENODEV; - } - l = iommu_read_reg(obj, MMU_REVISION); dev_info(obj->dev, "%s: version %d.%d\n", obj->name, (l >> 4) & 0xf, l & 0xf); diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index c677b9f2fefa..ef2060d7eeb8 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#include + #define MMU_REG_SIZE 256 /** @@ -43,7 +45,11 @@ struct omap_mmu_dev_attr { struct iommu_platform_data { const char *name; const char *clk_name; - const int nr_tlb_entries; + const char *reset_name; + int nr_tlb_entries; u32 da_start; u32 da_end; + + int (*assert_reset)(struct platform_device *pdev, const char *name); + int (*deassert_reset)(struct platform_device *pdev, const char *name); }; -- cgit v1.2.3 From ebf7cda0f92effd8169b831fae81e9437dce1fef Mon Sep 17 00:00:00 2001 From: Omar Ramirez Luna Date: Mon, 19 Nov 2012 19:05:51 -0600 Subject: iommu/omap: Adapt to runtime pm Use runtime PM functionality interfaced with hwmod enable/idle functions, to replace direct clock operations and sysconfig handling. Due to reset sequence, pm_runtime_[get|put]_sync must be used, to avoid possible operations with the module under reset. Because of this and given that the driver uses spin_locks to protect their critical sections, we must use pm_runtime_irq_safe in order for the runtime ops to be happy, otherwise might_sleep_if checks in runtime framework will complain. The remaining pm_runtime out of iommu_enable and iommu_disable corresponds to paths that can be accessed through debugfs, some of them doesn't work if the module is not enabled first, but in future if the mmu is idled withouth freeing, these are needed to debug. Signed-off-by: Omar Ramirez Luna Tested-by: Ohad Ben-Cohen Acked-by: Tony Lindgren Signed-off-by: Joerg Roedel --- arch/arm/mach-omap2/omap-iommu.c | 1 - drivers/iommu/omap-iommu.c | 40 +++++++++++++++----------------- drivers/iommu/omap-iommu.h | 3 --- drivers/iommu/omap-iommu2.c | 17 -------------- include/linux/platform_data/iommu-omap.h | 1 - 5 files changed, 19 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index 02726a647b1d..7642fc4672c1 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -31,7 +31,6 @@ static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused) return -ENOMEM; pdata->name = oh->name; - pdata->clk_name = oh->main_clk; pdata->nr_tlb_entries = a->nr_tlb_entries; pdata->da_start = a->da_start; pdata->da_end = a->da_end; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index af9b4f31f594..18108c1405e2 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -16,13 +16,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include @@ -160,7 +160,7 @@ static int iommu_enable(struct omap_iommu *obj) } } - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); err = arch_iommu->enable(obj); @@ -177,7 +177,7 @@ static void iommu_disable(struct omap_iommu *obj) arch_iommu->disable(obj); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); if (pdata->assert_reset) pdata->assert_reset(pdev, pdata->reset_name); @@ -303,7 +303,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) if (!obj || !obj->nr_tlb_entries || !e) return -EINVAL; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); iotlb_lock_get(obj, &l); if (l.base == obj->nr_tlb_entries) { @@ -333,7 +333,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) cr = iotlb_alloc_cr(obj, e); if (IS_ERR(cr)) { - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return PTR_ERR(cr); } @@ -347,7 +347,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) l.vict = l.base; iotlb_lock_set(obj, &l); out: - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return err; } @@ -377,7 +377,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da) int i; struct cr_regs cr; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) { u32 start; @@ -396,7 +396,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da) iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); } } - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); if (i == obj->nr_tlb_entries) dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da); @@ -410,7 +410,7 @@ static void flush_iotlb_all(struct omap_iommu *obj) { struct iotlb_lock l; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); l.base = 0; l.vict = 0; @@ -418,7 +418,7 @@ static void flush_iotlb_all(struct omap_iommu *obj) iommu_write_reg(obj, 1, MMU_GFLUSH); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); } #if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE) @@ -428,11 +428,11 @@ ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes) if (!obj || !buf) return -EINVAL; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); bytes = arch_iommu->dump_ctx(obj, buf, bytes); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return bytes; } @@ -446,7 +446,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) struct cr_regs tmp; struct cr_regs *p = crs; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); iotlb_lock_get(obj, &saved); for_each_iotlb_cr(obj, num, i, tmp) { @@ -456,7 +456,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) } iotlb_lock_set(obj, &saved); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return p - crs; } @@ -946,10 +946,6 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) if (!obj) return -ENOMEM; - obj->clk = clk_get(&pdev->dev, pdata->clk_name); - if (IS_ERR(obj->clk)) - goto err_clk; - obj->nr_tlb_entries = pdata->nr_tlb_entries; obj->name = pdata->name; obj->dev = &pdev->dev; @@ -992,6 +988,9 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) goto err_irq; platform_set_drvdata(pdev, obj); + pm_runtime_irq_safe(obj->dev); + pm_runtime_enable(obj->dev); + dev_info(&pdev->dev, "%s registered\n", obj->name); return 0; @@ -1000,8 +999,6 @@ err_irq: err_ioremap: release_mem_region(res->start, resource_size(res)); err_mem: - clk_put(obj->clk); -err_clk: kfree(obj); return err; } @@ -1022,7 +1019,8 @@ static int __devexit omap_iommu_remove(struct platform_device *pdev) release_mem_region(res->start, resource_size(res)); iounmap(obj->regbase); - clk_put(obj->clk); + pm_runtime_disable(obj->dev); + dev_info(&pdev->dev, "%s removed\n", obj->name); kfree(obj); return 0; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 2b5f3c04d167..120084206602 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -29,7 +29,6 @@ struct iotlb_entry { struct omap_iommu { const char *name; struct module *owner; - struct clk *clk; void __iomem *regbase; struct device *dev; void *isr_priv; @@ -116,8 +115,6 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) * MMU Register offsets */ #define MMU_REVISION 0x00 -#define MMU_SYSCONFIG 0x10 -#define MMU_SYSSTATUS 0x14 #define MMU_IRQSTATUS 0x18 #define MMU_IRQENABLE 0x1c #define MMU_WALKING_ST 0x40 diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c index 4a3a1c7a38c1..d745094a69dd 100644 --- a/drivers/iommu/omap-iommu2.c +++ b/drivers/iommu/omap-iommu2.c @@ -28,15 +28,6 @@ */ #define IOMMU_ARCH_VERSION 0x00000011 -/* SYSCONF */ -#define MMU_SYS_IDLE_SHIFT 3 -#define MMU_SYS_IDLE_FORCE (0 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_NONE (1 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_SMART (2 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_MASK (3 << MMU_SYS_IDLE_SHIFT) - -#define MMU_SYS_AUTOIDLE 1 - /* IRQSTATUS & IRQENABLE */ #define MMU_IRQ_MULTIHITFAULT (1 << 4) #define MMU_IRQ_TABLEWALKFAULT (1 << 3) @@ -105,11 +96,6 @@ static int omap2_iommu_enable(struct omap_iommu *obj) dev_info(obj->dev, "%s: version %d.%d\n", obj->name, (l >> 4) & 0xf, l & 0xf); - l = iommu_read_reg(obj, MMU_SYSCONFIG); - l &= ~MMU_SYS_IDLE_MASK; - l |= (MMU_SYS_IDLE_SMART | MMU_SYS_AUTOIDLE); - iommu_write_reg(obj, l, MMU_SYSCONFIG); - iommu_write_reg(obj, pa, MMU_TTB); __iommu_set_twl(obj, true); @@ -123,7 +109,6 @@ static void omap2_iommu_disable(struct omap_iommu *obj) l &= ~MMU_CNTL_MASK; iommu_write_reg(obj, l, MMU_CNTL); - iommu_write_reg(obj, MMU_SYS_IDLE_FORCE, MMU_SYSCONFIG); dev_dbg(obj->dev, "%s is shutting down\n", obj->name); } @@ -252,8 +237,6 @@ omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len) char *p = buf; pr_reg(REVISION); - pr_reg(SYSCONFIG); - pr_reg(SYSSTATUS); pr_reg(IRQSTATUS); pr_reg(IRQENABLE); pr_reg(WALKING_ST); diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index ef2060d7eeb8..5b429c43a297 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -44,7 +44,6 @@ struct omap_mmu_dev_attr { struct iommu_platform_data { const char *name; - const char *clk_name; const char *reset_name; int nr_tlb_entries; u32 da_start; -- cgit v1.2.3 From cc248d4b1ddf05fefc1373d9d7a4dd1df71b6190 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Dec 2012 16:11:13 -0500 Subject: svcrpc: don't byte-swap sk_reclen in place Byte-swapping in place is always a little dubious. Let's instead define this field to always be big-endian, and do the swapping on demand where we need it. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 12 +++++++++++- net/sunrpc/svcsock.c | 26 +++++++++++--------------- 2 files changed, 22 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 92ad02f0dcc0..613cf42227aa 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,11 +26,21 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - u32 sk_reclen; /* length of record */ + __be32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; +static inline u32 svc_sock_reclen(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; +} + +static inline u32 svc_sock_final_rec(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; +} + /* * Function prototypes. */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 03827cef1fa7..d50de2b95036 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -950,8 +950,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } - svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { + if (!(svc_sock_final_rec(svsk))) { /* FIXME: technically, a record can be fragmented, * and non-terminal fragments will not have the top * bit set in the fragment length header. @@ -961,21 +960,18 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) goto err_delete; } - svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; - dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); - if (svsk->sk_reclen > serv->sv_max_mesg) { + dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + if (svc_sock_reclen(svsk) > serv->sv_max_mesg) { net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n", - (unsigned long)svsk->sk_reclen); + (unsigned long)svc_sock_reclen(svsk)); goto err_delete; } } - if (svsk->sk_reclen < 8) + if (svc_sock_reclen(svsk) < 8) goto err_delete; /* client is nuts. */ - len = svsk->sk_reclen; - - return len; + return svc_sock_reclen(svsk); error: dprintk("RPC: TCP recv_record got %d\n", len); return len; @@ -1019,7 +1015,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) if (dst->iov_len < src->iov_len) return -EAGAIN; /* whatever; just giving up. */ memcpy(dst->iov_base, src->iov_base, src->iov_len); - xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; return 0; } @@ -1064,12 +1060,12 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svsk->sk_reclen - base; + want = svc_sock_reclen(svsk) - base; vec = rqstp->rq_vec; pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], - svsk->sk_reclen); + svc_sock_reclen(svsk)); rqstp->rq_respages = &rqstp->rq_pages[pnum]; @@ -1082,11 +1078,11 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0 && len != -EAGAIN) goto err_other; dprintk("svc: incomplete TCP record (%d of %d)\n", - svsk->sk_tcplen, svsk->sk_reclen); + svsk->sk_tcplen, svc_sock_reclen(svsk)); goto err_noclose; } - rqstp->rq_arg.len = svsk->sk_reclen; + rqstp->rq_arg.len = svc_sock_reclen(svsk); rqstp->rq_arg.page_base = 0; if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; -- cgit v1.2.3 From 8af345f58ac9b350bb23c1457c613381d9f00472 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Dec 2012 16:45:35 -0500 Subject: svcrpc: track rpc data length separately from sk_tcplen Keep a separate field, sk_datalen, that tracks only the data contained in a fragment, not including the fragment header. For now, this is always just max(0, sk_tcplen - 4), but after we allow multiple fragments sk_datalen will accumulate the total rpc data size while sk_tcplen only tracks progress receiving the current fragment. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 11 +++++++++-- net/sunrpc/svcsock.c | 19 ++++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 613cf42227aa..62fd1b756e99 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,8 +26,15 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - __be32 sk_reclen; /* length of record */ - u32 sk_tcplen; /* current read length */ + /* On-the-wire fragment header: */ + __be32 sk_reclen; + /* As we receive a record, this includes the length received so + * far (including the fragment header): */ + u32 sk_tcplen; + /* Total length of the data (not including fragment headers) + * received so far in the fragments making up this rpc: */ + u32 sk_datalen; + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1db42b1ffe28..2b09e2306bfa 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -874,9 +874,9 @@ static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return 0; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -893,9 +893,9 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { svsk->sk_pages[i] = rqstp->rq_pages[i]; @@ -907,9 +907,9 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) goto out; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { BUG_ON(svsk->sk_pages[i] == NULL); @@ -918,6 +918,7 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) } out: svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; } /* @@ -1066,8 +1067,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) /* Now receive data */ len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); - if (len >= 0) + if (len >= 0) { svsk->sk_tcplen += len; + svsk->sk_datalen += len; + } if (len != want) { svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) @@ -1100,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; /* If we have more data, signal svc_xprt_enqueue() to try again */ if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); @@ -1296,6 +1300,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; -- cgit v1.2.3 From 8fa72d234da9b6b473bbb1f74d533663e4996e6b Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 5 Dec 2012 20:17:21 +0100 Subject: bdi: add a user-tunable cpu_list for the bdi flusher threads In realtime environments, it may be desirable to keep the per-bdi flusher threads from running on certain cpus. This patch adds a cpu_list file to /sys/class/bdi/* to enable this. The default is to tie the flusher threads to the same numa node as the backing device (though I could be convinced to make it a mask of all cpus to avoid a change in behaviour). Thanks to Jeremy Eder for the original idea. Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 4 +++ mm/backing-dev.c | 84 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2a9a9abc9126..238521a19849 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,6 +18,7 @@ #include #include #include +#include struct page; struct device; @@ -105,6 +106,9 @@ struct backing_dev_info { struct timer_list laptop_mode_wb_timer; + cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */ + struct mutex flusher_cpumask_lock; + #ifdef CONFIG_DEBUG_FS struct dentry *debug_dir; struct dentry *debug_stats; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca2b3ee176..bd6a6cabef71 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -10,6 +10,7 @@ #include #include #include +#include #include static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); @@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio) +static ssize_t cpu_list_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + struct bdi_writeback *wb = &bdi->wb; + cpumask_var_t newmask; + ssize_t ret; + struct task_struct *task; + + if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) + return -ENOMEM; + + ret = cpulist_parse(buf, newmask); + if (!ret) { + spin_lock_bh(&bdi->wb_lock); + task = wb->task; + if (task) + get_task_struct(task); + spin_unlock_bh(&bdi->wb_lock); + + mutex_lock(&bdi->flusher_cpumask_lock); + if (task) { + ret = set_cpus_allowed_ptr(task, newmask); + put_task_struct(task); + } + if (ret == 0) { + cpumask_copy(bdi->flusher_cpumask, newmask); + ret = count; + } + mutex_unlock(&bdi->flusher_cpumask_lock); + + } + free_cpumask_var(newmask); + + return ret; +} + +static ssize_t cpu_list_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + ssize_t ret; + + mutex_lock(&bdi->flusher_cpumask_lock); + ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask); + mutex_unlock(&bdi->flusher_cpumask_lock); + + return ret; +} + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), + __ATTR_RW(cpu_list), __ATTR_NULL, }; @@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr) writeback_inodes_wb(&bdi->wb, 1024, WB_REASON_FORKER_THREAD); } else { + int ret; /* * The spinlock makes sure we do not lose * wake-ups when racing with 'bdi_queue_work()'. @@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr) spin_lock_bh(&bdi->wb_lock); bdi->wb.task = task; spin_unlock_bh(&bdi->wb_lock); + mutex_lock(&bdi->flusher_cpumask_lock); + ret = set_cpus_allowed_ptr(task, + bdi->flusher_cpumask); + mutex_unlock(&bdi->flusher_cpumask_lock); + if (ret) + printk_once("%s: failed to bind flusher" + " thread %s, error %d\n", + __func__, task->comm, ret); wake_up_process(task); } bdi_clear_pending(bdi); @@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, dev_name(dev)); if (IS_ERR(wb->task)) return PTR_ERR(wb->task); + } else { + int node; + /* + * Set up a default cpumask for the flusher threads that + * includes all cpus on the same numa node as the device. + * The mask may be overridden via sysfs. + */ + node = dev_to_node(bdi->dev); + if (node != NUMA_NO_NODE) + cpumask_copy(bdi->flusher_cpumask, + cpumask_of_node(node)); } bdi_debug_register(bdi, dev_name(dev)); @@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); + if (!bdi_cap_flush_forker(bdi)) { + bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!bdi->flusher_cpumask) + return -ENOMEM; + cpumask_setall(bdi->flusher_cpumask); + mutex_init(&bdi->flusher_cpumask_lock); + } else + bdi->flusher_cpumask = NULL; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init(&bdi->bdi_stat[i], 0); if (err) @@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi) err: while (i--) percpu_counter_destroy(&bdi->bdi_stat[i]); + kfree(bdi->flusher_cpumask); } return err; @@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); + kfree(bdi->flusher_cpumask); + /* * If bdi_unregister() had already been called earlier, the * wakeup_timer could still be armed because bdi_prune_sb() -- cgit v1.2.3 From 161b96c383c442f4d7dabbb8500a5fbd551b344d Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Wed, 7 Nov 2012 21:30:29 +0400 Subject: spi/clps711x: New SPI master driver This patch add new driver for CLPS711X SPI master controller. Due to platform limitations driver supports only 8 bit transfer mode. Chip select control is handled via GPIO. Signed-off-by: Alexander Shiyan Acked-by: Arnd Bergmann Signed-off-by: Grant Likely --- drivers/spi/Kconfig | 7 + drivers/spi/Makefile | 1 + drivers/spi/spi-clps711x.c | 296 +++++++++++++++++++++++++++++ include/linux/platform_data/spi-clps711x.h | 21 ++ 4 files changed, 325 insertions(+) create mode 100644 drivers/spi/spi-clps711x.c create mode 100644 include/linux/platform_data/spi-clps711x.h (limited to 'include/linux') diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 1acae359cabe..2a13e637e46b 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -123,6 +123,13 @@ config SPI_BUTTERFLY inexpensive battery powered microcontroller evaluation board. This same cable can be used to flash new firmware. +config SPI_CLPS711X + tristate "CLPS711X host SPI controller" + depends on ARCH_CLPS711X + help + This enables dedicated general purpose SPI/Microwire1-compatible + master mode interface (SSI1) for CLPS711X-based CPUs. + config SPI_COLDFIRE_QSPI tristate "Freescale Coldfire QSPI controller" depends on (M520x || M523x || M5249 || M525x || M527x || M528x || M532x) diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index c48df47e4b0f..25ab4a1cd014 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_SPI_BFIN5XX) += spi-bfin5xx.o obj-$(CONFIG_SPI_BFIN_SPORT) += spi-bfin-sport.o obj-$(CONFIG_SPI_BITBANG) += spi-bitbang.o obj-$(CONFIG_SPI_BUTTERFLY) += spi-butterfly.o +obj-$(CONFIG_SPI_CLPS711X) += spi-clps711x.o obj-$(CONFIG_SPI_COLDFIRE_QSPI) += spi-coldfire-qspi.o obj-$(CONFIG_SPI_DAVINCI) += spi-davinci.o obj-$(CONFIG_SPI_DESIGNWARE) += spi-dw.o diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c new file mode 100644 index 000000000000..59677eb30f94 --- /dev/null +++ b/drivers/spi/spi-clps711x.c @@ -0,0 +1,296 @@ +/* + * CLPS711X SPI bus driver + * + * Copyright (C) 2012 Alexander Shiyan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DRIVER_NAME "spi-clps711x" + +struct spi_clps711x_data { + struct completion done; + + struct clk *spi_clk; + u32 max_speed_hz; + + u8 *tx_buf; + u8 *rx_buf; + int count; + int len; + + int chipselect[0]; +}; + +static int spi_clps711x_setup(struct spi_device *spi) +{ + struct spi_clps711x_data *hw = spi_master_get_devdata(spi->master); + + if (spi->bits_per_word != 8) { + dev_err(&spi->dev, "Unsupported master bus width %i\n", + spi->bits_per_word); + return -EINVAL; + } + + /* We are expect that SPI-device is not selected */ + gpio_direction_output(hw->chipselect[spi->chip_select], + !(spi->mode & SPI_CS_HIGH)); + + return 0; +} + +static void spi_clps711x_setup_mode(struct spi_device *spi) +{ + /* Setup edge for transfer */ + if (spi->mode & SPI_CPHA) + clps_writew(clps_readw(SYSCON3) | SYSCON3_ADCCKNSEN, SYSCON3); + else + clps_writew(clps_readw(SYSCON3) & ~SYSCON3_ADCCKNSEN, SYSCON3); +} + +static int spi_clps711x_setup_xfer(struct spi_device *spi, + struct spi_transfer *xfer) +{ + u32 speed = xfer->speed_hz ? : spi->max_speed_hz; + u8 bpw = xfer->bits_per_word ? : spi->bits_per_word; + struct spi_clps711x_data *hw = spi_master_get_devdata(spi->master); + + if (bpw != 8) { + dev_err(&spi->dev, "Unsupported master bus width %i\n", bpw); + return -EINVAL; + } + + /* Setup SPI frequency divider */ + if (!speed || (speed >= hw->max_speed_hz)) + clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) | + SYSCON1_ADCKSEL(3), SYSCON1); + else if (speed >= (hw->max_speed_hz / 2)) + clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) | + SYSCON1_ADCKSEL(2), SYSCON1); + else if (speed >= (hw->max_speed_hz / 8)) + clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) | + SYSCON1_ADCKSEL(1), SYSCON1); + else + clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) | + SYSCON1_ADCKSEL(0), SYSCON1); + + return 0; +} + +static int spi_clps711x_transfer_one_message(struct spi_master *master, + struct spi_message *msg) +{ + struct spi_clps711x_data *hw = spi_master_get_devdata(master); + struct spi_transfer *xfer; + int status = 0, cs = hw->chipselect[msg->spi->chip_select]; + u32 data; + + spi_clps711x_setup_mode(msg->spi); + + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + if (spi_clps711x_setup_xfer(msg->spi, xfer)) { + status = -EINVAL; + goto out_xfr; + } + + gpio_set_value(cs, !!(msg->spi->mode & SPI_CS_HIGH)); + + INIT_COMPLETION(hw->done); + + hw->count = 0; + hw->len = xfer->len; + hw->tx_buf = (u8 *)xfer->tx_buf; + hw->rx_buf = (u8 *)xfer->rx_buf; + + /* Initiate transfer */ + data = hw->tx_buf ? hw->tx_buf[hw->count] : 0; + clps_writel(data | SYNCIO_FRMLEN(8) | SYNCIO_TXFRMEN, SYNCIO); + + wait_for_completion(&hw->done); + + if (xfer->delay_usecs) + udelay(xfer->delay_usecs); + + if (xfer->cs_change || + list_is_last(&xfer->transfer_list, &msg->transfers)) + gpio_set_value(cs, !(msg->spi->mode & SPI_CS_HIGH)); + + msg->actual_length += xfer->len; + } + +out_xfr: + msg->status = status; + spi_finalize_current_message(master); + + return 0; +} + +static irqreturn_t spi_clps711x_isr(int irq, void *dev_id) +{ + struct spi_clps711x_data *hw = (struct spi_clps711x_data *)dev_id; + u32 data; + + /* Handle RX */ + data = clps_readb(SYNCIO); + if (hw->rx_buf) + hw->rx_buf[hw->count] = (u8)data; + + hw->count++; + + /* Handle TX */ + if (hw->count < hw->len) { + data = hw->tx_buf ? hw->tx_buf[hw->count] : 0; + clps_writel(data | SYNCIO_FRMLEN(8) | SYNCIO_TXFRMEN, SYNCIO); + } else + complete(&hw->done); + + return IRQ_HANDLED; +} + +static int __devinit spi_clps711x_probe(struct platform_device *pdev) +{ + int i, ret; + struct spi_master *master; + struct spi_clps711x_data *hw; + struct spi_clps711x_pdata *pdata = dev_get_platdata(&pdev->dev); + + if (!pdata) { + dev_err(&pdev->dev, "No platform data supplied\n"); + return -EINVAL; + } + + if (pdata->num_chipselect < 1) { + dev_err(&pdev->dev, "At least one CS must be defined\n"); + return -EINVAL; + } + + master = spi_alloc_master(&pdev->dev, + sizeof(struct spi_clps711x_data) + + sizeof(int) * pdata->num_chipselect); + if (!master) { + dev_err(&pdev->dev, "SPI allocating memory error\n"); + return -ENOMEM; + } + + master->bus_num = pdev->id; + master->mode_bits = SPI_CPHA | SPI_CS_HIGH; + master->num_chipselect = pdata->num_chipselect; + master->setup = spi_clps711x_setup; + master->transfer_one_message = spi_clps711x_transfer_one_message; + + hw = spi_master_get_devdata(master); + + for (i = 0; i < master->num_chipselect; i++) { + hw->chipselect[i] = pdata->chipselect[i]; + if (!gpio_is_valid(hw->chipselect[i])) { + dev_err(&pdev->dev, "Invalid CS GPIO %i\n", i); + ret = -EINVAL; + goto err_out; + } + if (gpio_request(hw->chipselect[i], DRIVER_NAME)) { + dev_err(&pdev->dev, "Can't get CS GPIO %i\n", i); + ret = -EINVAL; + goto err_out; + } + } + + hw->spi_clk = devm_clk_get(&pdev->dev, "spi"); + if (IS_ERR(hw->spi_clk)) { + dev_err(&pdev->dev, "Can't get clocks\n"); + ret = PTR_ERR(hw->spi_clk); + goto err_out; + } + hw->max_speed_hz = clk_get_rate(hw->spi_clk); + + init_completion(&hw->done); + platform_set_drvdata(pdev, master); + + /* Disable extended mode due hardware problems */ + clps_writew(clps_readw(SYSCON3) & ~SYSCON3_ADCCON, SYSCON3); + + /* Clear possible pending interrupt */ + clps_readl(SYNCIO); + + ret = devm_request_irq(&pdev->dev, IRQ_SSEOTI, spi_clps711x_isr, 0, + dev_name(&pdev->dev), hw); + if (ret) { + dev_err(&pdev->dev, "Can't request IRQ\n"); + clk_put(hw->spi_clk); + goto clk_out; + } + + ret = spi_register_master(master); + if (!ret) { + dev_info(&pdev->dev, + "SPI bus driver initialized. Master clock %u Hz\n", + hw->max_speed_hz); + return 0; + } + + dev_err(&pdev->dev, "Failed to register master\n"); + devm_free_irq(&pdev->dev, IRQ_SSEOTI, hw); + +clk_out: + devm_clk_put(&pdev->dev, hw->spi_clk); + +err_out: + while (--i >= 0) + if (gpio_is_valid(hw->chipselect[i])) + gpio_free(hw->chipselect[i]); + + platform_set_drvdata(pdev, NULL); + spi_master_put(master); + kfree(master); + + return ret; +} + +static int __devexit spi_clps711x_remove(struct platform_device *pdev) +{ + int i; + struct spi_master *master = platform_get_drvdata(pdev); + struct spi_clps711x_data *hw = spi_master_get_devdata(master); + + devm_free_irq(&pdev->dev, IRQ_SSEOTI, hw); + + for (i = 0; i < master->num_chipselect; i++) + if (gpio_is_valid(hw->chipselect[i])) + gpio_free(hw->chipselect[i]); + + devm_clk_put(&pdev->dev, hw->spi_clk); + platform_set_drvdata(pdev, NULL); + spi_unregister_master(master); + kfree(master); + + return 0; +} + +static struct platform_driver clps711x_spi_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = spi_clps711x_probe, + .remove = __devexit_p(spi_clps711x_remove), +}; +module_platform_driver(clps711x_spi_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Alexander Shiyan "); +MODULE_DESCRIPTION("CLPS711X SPI bus driver"); diff --git a/include/linux/platform_data/spi-clps711x.h b/include/linux/platform_data/spi-clps711x.h new file mode 100644 index 000000000000..301956e63143 --- /dev/null +++ b/include/linux/platform_data/spi-clps711x.h @@ -0,0 +1,21 @@ +/* + * CLPS711X SPI bus driver definitions + * + * Copyright (C) 2012 Alexander Shiyan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H +#define ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H + +/* Board specific platform_data */ +struct spi_clps711x_pdata { + int *chipselect; /* Array of GPIO-numbers */ + int num_chipselect; /* Total count of GPIOs */ +}; + +#endif -- cgit v1.2.3 From 464ee9f966404786ba4c6be35dc8362ee8e6ba4e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 12:49:27 -0500 Subject: NFSv4.1: Ensure that the client tracks the server target_highest_slotid Dynamic slot allocation in NFSv4.1 depends on the client being able to track the server's target value for the highest slotid in the slot table. See the reference in Section 2.10.6.1 of RFC5661. To avoid ordering problems in the case where 2 SEQUENCE replies contain conflicting updates to this target value, we also introduce a generation counter, to track whether or not an RPC containing a SEQUENCE operation was launched before or after the last update. Also rename the nfs4_slot_table target_max_slots field to 'target_highest_slotid' to avoid confusion with a slot table size or number of slots. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/nfs4proc.c | 25 +++++++++++++++++++++++++ fs/nfs/nfs4state.c | 7 +++---- fs/nfs/nfs4xdr.c | 4 ++-- include/linux/nfs_fs_sb.h | 5 +++-- include/linux/nfs_xdr.h | 2 ++ 6 files changed, 36 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0be08b964f38..0ef047b7d28d 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -576,7 +576,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (args->crsa_target_max_slots == fc_tbl->max_slots) goto out; - fc_tbl->target_max_slots = args->crsa_target_max_slots; + fc_tbl->target_highest_slotid = args->crsa_target_max_slots; nfs41_handle_recall_slot(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 197ef3e4e1f7..d91abaa522e8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -488,6 +488,28 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) res->sr_slot = NULL; } +/* Update the client's idea of target_highest_slotid */ +static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + if (tbl->target_highest_slotid == target_highest_slotid) + return; + tbl->target_highest_slotid = target_highest_slotid; + tbl->generation++; +} + +static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot, + struct nfs4_sequence_res *res) +{ + spin_lock(&tbl->slot_tbl_lock); + if (tbl->generation != slot->generation) + goto out; + nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); +out: + spin_unlock(&tbl->slot_tbl_lock); +} + static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { struct nfs4_session *session; @@ -522,6 +544,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * /* Check sequence flags */ if (res->sr_status_flags != 0) nfs4_schedule_lease_recovery(clp); + nfs41_update_target_slotid(slot->table, slot, res); break; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and @@ -583,6 +606,7 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) tbl->highest_used_slotid = slotid; ret = &tbl->slots[slotid]; ret->renewal_time = jiffies; + ret->generation = tbl->generation; out: dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", @@ -5693,6 +5717,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, tbl->max_slots = max_slots; } tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->target_highest_slotid = max_slots - 1; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9495789c425b..842cb8c2f65d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2033,17 +2033,16 @@ static int nfs4_recall_slot(struct nfs_client *clp) return 0; nfs4_begin_drain_session(clp); fc_tbl = &clp->cl_session->fc_slot_table; - new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS); + new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS); if (!new) return -ENOMEM; spin_lock(&fc_tbl->slot_tbl_lock); - for (i = 0; i < fc_tbl->target_max_slots; i++) + for (i = 0; i <= fc_tbl->target_highest_slotid; i++) new[i].seq_nr = fc_tbl->slots[i].seq_nr; old = fc_tbl->slots; fc_tbl->slots = new; - fc_tbl->max_slots = fc_tbl->target_max_slots; - fc_tbl->target_max_slots = 0; + fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1; clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; spin_unlock(&fc_tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 27b0fec1a6b0..05d34f1fcc19 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5552,8 +5552,8 @@ static int decode_sequence(struct xdr_stream *xdr, } /* highest slot id - currently not processed */ dummy = be32_to_cpup(p++); - /* target highest slot id - currently not processed */ - dummy = be32_to_cpup(p++); + /* target highest slot id */ + res->sr_target_highest_slotid = be32_to_cpup(p++); /* result flags */ res->sr_status_flags = be32_to_cpup(p); status = 0; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b0412873d29c..57d406997def 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -217,8 +217,9 @@ struct nfs4_slot_table { u32 max_slots; /* # slots in table */ u32 highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ - u32 target_max_slots; /* Set by CB_RECALL_SLOT as - * the new max_slots */ + u32 target_highest_slotid; /* Server max_slot target */ + unsigned long generation; /* Generation counter for + target_highest_slotid */ struct completion complete; }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index deb31bbbb857..08c47db7417f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,6 +188,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { struct nfs4_slot_table *table; + unsigned long generation; unsigned long renewal_time; u32 slot_nr; u32 seq_nr; @@ -202,6 +203,7 @@ struct nfs4_sequence_res { struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ u32 sr_status_flags; + u32 sr_target_highest_slotid; }; struct nfs4_get_lease_time_args { -- cgit v1.2.3 From da0507b7c95ccd4d9c86394eef42fe076032af30 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 18:10:30 -0500 Subject: NFSv4.1: Reset the sequence number for slots that have been deallocated When the server tells us that it is dynamically resizing the session replay cache, we should reset the sequence number for those slots that have been deallocated. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 18 ++++++++++++++++++ fs/nfs/nfs4xdr.c | 4 ++-- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d91abaa522e8..52435ec44193 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -498,6 +498,22 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, tbl->generation++; } +static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, + u32 highest_slotid) +{ + unsigned int max_slotid, i; + + if (tbl->server_highest_slotid == highest_slotid) + return; + if (tbl->highest_used_slotid > highest_slotid) + return; + max_slotid = min(tbl->max_slots - 1, highest_slotid); + /* Reset the seq_nr for deallocated slots */ + for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++) + tbl->slots[i].seq_nr = 1; + tbl->server_highest_slotid = highest_slotid; +} + static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, struct nfs4_slot *slot, struct nfs4_sequence_res *res) @@ -505,6 +521,7 @@ static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, spin_lock(&tbl->slot_tbl_lock); if (tbl->generation != slot->generation) goto out; + nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); out: spin_unlock(&tbl->slot_tbl_lock); @@ -5718,6 +5735,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, } tbl->highest_used_slotid = NFS4_NO_SLOT; tbl->target_highest_slotid = max_slots - 1; + tbl->server_highest_slotid = max_slots - 1; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 05d34f1fcc19..a67040f51597 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5550,8 +5550,8 @@ static int decode_sequence(struct xdr_stream *xdr, dprintk("%s Invalid slot id\n", __func__); goto out_err; } - /* highest slot id - currently not processed */ - dummy = be32_to_cpup(p++); + /* highest slot id */ + res->sr_highest_slotid = be32_to_cpup(p++); /* target highest slot id */ res->sr_target_highest_slotid = be32_to_cpup(p++); /* result flags */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 57d406997def..646e64bbff4c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -218,6 +218,7 @@ struct nfs4_slot_table { u32 highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ u32 target_highest_slotid; /* Server max_slot target */ + u32 server_highest_slotid; /* Server highest slotid */ unsigned long generation; /* Generation counter for target_highest_slotid */ struct completion complete; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 08c47db7417f..3ddb08fba935 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -203,6 +203,7 @@ struct nfs4_sequence_res { struct nfs4_slot *sr_slot; /* slot used to send request */ int sr_status; /* sequence operation status */ u32 sr_status_flags; + u32 sr_highest_slotid; u32 sr_target_highest_slotid; }; -- cgit v1.2.3 From 97e548a93de213b149eea025a97d88e28143b445 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 14:45:48 -0500 Subject: NFSv4.1: Support dynamic resizing of the session slot table Allow the server to control the size of the session slot table by adjusting the value of sr_target_max_slots in the reply to the SEQUENCE operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 ++++++++++-- fs/nfs/nfs4state.c | 6 +++--- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 62212231ce62..1792ece8b53c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -492,10 +492,17 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, u32 target_highest_slotid) { + unsigned int max_slotid, i; + if (tbl->target_highest_slotid == target_highest_slotid) return; tbl->target_highest_slotid = target_highest_slotid; tbl->generation++; + + max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid); + for (i = tbl->max_slotid + 1; i <= max_slotid; i++) + rpc_wake_up_next(&tbl->slot_tbl_waitq); + tbl->max_slotid = max_slotid; } void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, @@ -622,8 +629,8 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, tbl->max_slots); - slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); - if (slotid >= tbl->max_slots) + slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); + if (slotid > tbl->max_slotid) goto out; __set_bit(slotid, tbl->used_slots); if (slotid > tbl->highest_used_slotid || @@ -5744,6 +5751,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, tbl->highest_used_slotid = NFS4_NO_SLOT; tbl->target_highest_slotid = max_slots - 1; tbl->server_highest_slotid = max_slots - 1; + tbl->max_slotid = max_slots - 1; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 842cb8c2f65d..1b7fa73c9436 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -254,15 +254,14 @@ static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; struct nfs4_slot_table *tbl; - int max_slots; + unsigned int i; if (ses == NULL) return; tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { spin_lock(&tbl->slot_tbl_lock); - max_slots = tbl->max_slots; - while (max_slots--) { + for (i = 0; i <= tbl->max_slotid; i++) { if (rpc_wake_up_first(&tbl->slot_tbl_waitq, nfs4_set_task_privileged, NULL) == NULL) @@ -2043,6 +2042,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) old = fc_tbl->slots; fc_tbl->slots = new; fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1; + fc_tbl->max_slotid = fc_tbl->target_highest_slotid; clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; spin_unlock(&fc_tbl->slot_tbl_lock); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 646e64bbff4c..30715508fade 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -215,6 +215,7 @@ struct nfs4_slot_table { spinlock_t slot_tbl_lock; struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ u32 max_slots; /* # slots in table */ + u32 max_slotid; /* Max allowed slotid value */ u32 highest_used_slotid; /* sent to server on each SEQ. * op for dynamic resizing */ u32 target_highest_slotid; /* Server max_slot target */ -- cgit v1.2.3 From 87dda67e7386ba7d2164391ea58b34e028d8157b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 19:49:20 -0500 Subject: NFSv4.1: Allow SEQUENCE to resize the slot table on the fly Instead of an array of slots, use a singly linked list of slots that can be dynamically appended to or shrunk. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 +- fs/nfs/nfs4proc.c | 174 +++++++++++++++++++++++++++++++----------------- fs/nfs/nfs4state.c | 22 ++---- include/linux/nfs_xdr.h | 1 + 4 files changed, 121 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5d4e82b10c3c..856bc496a210 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -258,10 +258,10 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp, extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); -extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, - u32 max_slots, gfp_t gfp_flags); extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, u32 target_highest_slotid); +extern int nfs4_resize_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1792ece8b53c..fc65300172e1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -396,6 +396,27 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp #if defined(CONFIG_NFS_V4_1) +/* + * nfs4_shrink_slot_table - free retired slots from the slot table + */ +static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) +{ + struct nfs4_slot **p; + if (newsize >= tbl->max_slots) + return; + + p = &tbl->slots; + while (newsize--) + p = &(*p)->next; + while (*p) { + struct nfs4_slot *slot = *p; + + *p = slot->next; + kfree(slot); + tbl->max_slots--; + } +} + /* * nfs4_free_slot - free a slot and efficiently update slot table. * @@ -499,7 +520,7 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, tbl->target_highest_slotid = target_highest_slotid; tbl->generation++; - max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid); + max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid); for (i = tbl->max_slotid + 1; i <= max_slotid; i++) rpc_wake_up_next(&tbl->slot_tbl_waitq); tbl->max_slotid = max_slotid; @@ -516,16 +537,12 @@ void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, u32 highest_slotid) { - unsigned int max_slotid, i; - if (tbl->server_highest_slotid == highest_slotid) return; if (tbl->highest_used_slotid > highest_slotid) return; - max_slotid = min(tbl->max_slots - 1, highest_slotid); - /* Reset the seq_nr for deallocated slots */ - for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++) - tbl->slots[i].seq_nr = 1; + /* Deallocate slots */ + nfs4_shrink_slot_table(tbl, highest_slotid + 1); tbl->server_highest_slotid = highest_slotid; } @@ -612,6 +629,42 @@ static int nfs4_sequence_done(struct rpc_task *task, return nfs41_sequence_done(task, res); } +static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot *slot; + + slot = kzalloc(sizeof(*slot), gfp_mask); + if (slot) { + slot->table = tbl; + slot->slot_nr = slotid; + slot->seq_nr = seq_init; + } + return slot; +} + +static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot **p, *slot; + + p = &tbl->slots; + for (;;) { + if (*p == NULL) { + *p = nfs4_new_slot(tbl, tbl->max_slots, + seq_init, gfp_mask); + if (*p == NULL) + break; + tbl->max_slots++; + } + slot = *p; + if (slot->slot_nr == slotid) + return slot; + p = &slot->next; + } + return NULL; +} + /* * nfs4_alloc_slot - efficiently look for a free slot * @@ -628,15 +681,17 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, - tbl->max_slots); + tbl->max_slotid + 1); slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); if (slotid > tbl->max_slotid) goto out; + ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + if (ret == NULL) + goto out; __set_bit(slotid, tbl->used_slots); if (slotid > tbl->highest_used_slotid || tbl->highest_used_slotid == NFS4_NO_SLOT) tbl->highest_used_slotid = slotid; - ret = &tbl->slots[slotid]; ret->renewal_time = jiffies; ret->generation = tbl->generation; @@ -5718,67 +5773,56 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } -struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, - u32 max_slots, gfp_t gfp_flags) +static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) { - struct nfs4_slot *tbl; - u32 i; - - tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags); - if (tbl != NULL) { - for (i = 0; i < max_slots; i++) { - tbl[i].table = table; - tbl[i].slot_nr = i; - } - } - return tbl; + if (max_reqs <= tbl->max_slots) + return 0; + if (nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS)) + return 0; + return -ENOMEM; } -static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, - struct nfs4_slot *new, - u32 max_slots, +static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, + u32 server_highest_slotid, u32 ivalue) { - struct nfs4_slot *old = NULL; - u32 i; + struct nfs4_slot **p; - spin_lock(&tbl->slot_tbl_lock); - if (new) { - old = tbl->slots; - tbl->slots = new; - tbl->max_slots = max_slots; + nfs4_shrink_slot_table(tbl, server_highest_slotid + 1); + p = &tbl->slots; + while (*p) { + (*p)->seq_nr = ivalue; + p = &(*p)->next; } tbl->highest_used_slotid = NFS4_NO_SLOT; - tbl->target_highest_slotid = max_slots - 1; - tbl->server_highest_slotid = max_slots - 1; - tbl->max_slotid = max_slots - 1; - for (i = 0; i < tbl->max_slots; i++) - tbl->slots[i].seq_nr = ivalue; - spin_unlock(&tbl->slot_tbl_lock); - kfree(old); + tbl->target_highest_slotid = server_highest_slotid; + tbl->server_highest_slotid = server_highest_slotid; + tbl->max_slotid = server_highest_slotid; } /* * (re)Initialise a slot table */ -static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, - u32 ivalue) +static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) { - struct nfs4_slot *new = NULL; - int ret = -ENOMEM; + int ret; dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, max_reqs, tbl->max_slots); - /* Does the newly negotiated max_reqs match the existing slot table? */ - if (max_reqs != tbl->max_slots) { - new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS); - if (!new) - goto out; - } - ret = 0; + if (max_reqs > NFS4_MAX_SLOT_TABLE) + max_reqs = NFS4_MAX_SLOT_TABLE; + + ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); + if (ret) + goto out; + + spin_lock(&tbl->slot_tbl_lock); + nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue); + spin_unlock(&tbl->slot_tbl_lock); - nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, tbl, tbl->slots, tbl->max_slots); out: @@ -5786,18 +5830,28 @@ out: return ret; } +int nfs4_resize_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) +{ + int ret; + + if (max_reqs > NFS4_MAX_SLOT_TABLE) + max_reqs = NFS4_MAX_SLOT_TABLE; + ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); + if (ret) + return ret; + spin_lock(&tbl->slot_tbl_lock); + nfs4_shrink_slot_table(tbl, max_reqs); + tbl->max_slotid = max_reqs - 1; + spin_unlock(&tbl->slot_tbl_lock); + return 0; +} + /* Destroy the slot table */ static void nfs4_destroy_slot_tables(struct nfs4_session *session) { - if (session->fc_slot_table.slots != NULL) { - kfree(session->fc_slot_table.slots); - session->fc_slot_table.slots = NULL; - } - if (session->bc_slot_table.slots != NULL) { - kfree(session->bc_slot_table.slots); - session->bc_slot_table.slots = NULL; - } - return; + nfs4_shrink_slot_table(&session->fc_slot_table, 0); + nfs4_shrink_slot_table(&session->bc_slot_table, 0); } /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1b7fa73c9436..c14b2c7ac8a7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2025,29 +2025,15 @@ out: static int nfs4_recall_slot(struct nfs_client *clp) { struct nfs4_slot_table *fc_tbl; - struct nfs4_slot *new, *old; - int i; + u32 new_size; if (!nfs4_has_session(clp)) return 0; nfs4_begin_drain_session(clp); - fc_tbl = &clp->cl_session->fc_slot_table; - new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS); - if (!new) - return -ENOMEM; - spin_lock(&fc_tbl->slot_tbl_lock); - for (i = 0; i <= fc_tbl->target_highest_slotid; i++) - new[i].seq_nr = fc_tbl->slots[i].seq_nr; - old = fc_tbl->slots; - fc_tbl->slots = new; - fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1; - fc_tbl->max_slotid = fc_tbl->target_highest_slotid; - clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; - spin_unlock(&fc_tbl->slot_tbl_lock); - - kfree(old); - return 0; + fc_tbl = &clp->cl_session->fc_slot_table; + new_size = fc_tbl->server_highest_slotid + 1; + return nfs4_resize_slot_table(fc_tbl, new_size, 1); } static int nfs4_bind_conn_to_session(struct nfs_client *clp) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3ddb08fba935..44d256f6021c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,6 +188,7 @@ struct nfs4_channel_attrs { /* nfs41 sessions slot seqid */ struct nfs4_slot { struct nfs4_slot_table *table; + struct nfs4_slot *next; unsigned long generation; unsigned long renewal_time; u32 slot_nr; -- cgit v1.2.3 From c34309a45ea491e5f0c0d0af49ccfa018ff35fc1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 14:33:03 -0500 Subject: NFS: Remove unused function slot_idx Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 30715508fade..e707c1b69796 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -225,11 +225,6 @@ struct nfs4_slot_table { struct completion complete; }; -static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp) -{ - return sp - tbl->slots; -} - /* * Session related parameters */ -- cgit v1.2.3 From 76e697ba7e8d187f50e385d21a2b2f1709a62c14 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 14:20:49 -0500 Subject: NFSv4.1: Move slot table and session struct definitions to nfs4session.h Clean up. Gather NFSv4.1 slot definitions in fs/nfs/nfs4session.h. Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 1 + fs/nfs/callback_xdr.c | 1 + fs/nfs/internal.h | 21 -------- fs/nfs/nfs4_fs.h | 12 ----- fs/nfs/nfs4filelayout.c | 1 + fs/nfs/nfs4session.h | 101 +++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4state.c | 1 + fs/nfs/nfs4xdr.c | 1 + fs/nfs/super.c | 1 + include/linux/nfs_fs_sb.h | 49 ------------------- include/linux/nfs_xdr.h | 11 +---- 11 files changed, 108 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index f1027b06a1a9..4fa788c93f46 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -40,6 +40,7 @@ #include #include "../pnfs.h" +#include "../nfs4session.h" #include "../internal.h" #include "blocklayout.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index ea6a7b190e6b..59461c957d9d 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -16,6 +16,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "internal.h" +#include "nfs4session.h" #define CB_OP_TAGLEN_MAXSZ (512) #define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8965a998b306..9bdbfc3884a9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -18,27 +18,6 @@ struct nfs_string; */ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) -/* - * Determine if sessions are in use. - */ -static inline int nfs4_has_session(const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (clp->cl_session) - return 1; -#endif /* CONFIG_NFS_V4_1 */ - return 0; -} - -static inline int nfs4_has_persistent_session(const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (nfs4_has_session(clp)) - return (clp->cl_session->flags & SESSION4_PERSIST); -#endif /* CONFIG_NFS_V4_1 */ - return 0; -} - static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cd3e3096b60a..322bd0168ebf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -29,11 +29,6 @@ enum nfs4_client_state { NFS4CLNT_BIND_CONN_TO_SESSION, }; -enum nfs4_session_state { - NFS4_SESSION_INITING, - NFS4_SESSION_DRAINING, -}; - #define NFS4_RENEW_TIMEOUT 0x01 #define NFS4_RENEW_DELEGATION_CB 0x02 @@ -327,13 +322,6 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp); -extern void nfs4_session_drain_complete(struct nfs4_session *session, - struct nfs4_slot_table *tbl); - -static inline bool nfs4_session_draining(struct nfs4_session *session) -{ - return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); -} #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index bfb28fa38e74..591a1a7f8f94 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -35,6 +35,7 @@ #include +#include "nfs4session.h" #include "internal.h" #include "delegation.h" #include "nfs4filelayout.h" diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index cb47b1eb0886..e96323ff1d95 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -7,6 +7,68 @@ #ifndef __LINUX_FS_NFS_NFS4SESSION_H #define __LINUX_FS_NFS_NFS4SESSION_H +/* maximum number of slots to use */ +#define NFS4_DEF_SLOT_TABLE_SIZE (16U) +#define NFS4_MAX_SLOT_TABLE (256U) +#define NFS4_NO_SLOT ((u32)-1) + +#if IS_ENABLED(CONFIG_NFS_V4) + +/* Sessions slot seqid */ +struct nfs4_slot { + struct nfs4_slot_table *table; + struct nfs4_slot *next; + unsigned long generation; + unsigned long renewal_time; + u32 slot_nr; + u32 seq_nr; +}; + +/* Sessions */ +#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) +struct nfs4_slot_table { + struct nfs4_session *session; /* Parent session */ + struct nfs4_slot *slots; /* seqid per slot */ + unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ + spinlock_t slot_tbl_lock; + struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ + u32 max_slots; /* # slots in table */ + u32 max_slotid; /* Max allowed slotid value */ + u32 highest_used_slotid; /* sent to server on each SEQ. + * op for dynamic resizing */ + u32 target_highest_slotid; /* Server max_slot target */ + u32 server_highest_slotid; /* Server highest slotid */ + unsigned long generation; /* Generation counter for + target_highest_slotid */ + struct completion complete; +}; + +/* + * Session related parameters + */ +struct nfs4_session { + struct nfs4_sessionid sess_id; + u32 flags; + unsigned long session_state; + u32 hash_alg; + u32 ssv_len; + + /* The fore and back channel */ + struct nfs4_channel_attrs fc_attrs; + struct nfs4_slot_table fc_slot_table; + struct nfs4_channel_attrs bc_attrs; + struct nfs4_slot_table bc_slot_table; + struct nfs_client *clp; + /* Create session arguments */ + unsigned int fc_target_max_rqst_sz; + unsigned int fc_target_max_resp_sz; +}; + +enum nfs4_session_state { + NFS4_SESSION_INITING, + NFS4_SESSION_DRAINING, +}; + #if defined(CONFIG_NFS_V4_1) extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); @@ -24,6 +86,31 @@ extern void nfs4_destroy_session(struct nfs4_session *session); extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); +extern void nfs4_session_drain_complete(struct nfs4_session *session, + struct nfs4_slot_table *tbl); + +static inline bool nfs4_session_draining(struct nfs4_session *session) +{ + return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); +} + +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ + if (clp->cl_session) + return 1; + return 0; +} + +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) + return (clp->cl_session->flags & SESSION4_PERSIST); + return 0; +} + #else /* defined(CONFIG_NFS_V4_1) */ static inline int nfs4_init_session(struct nfs_server *server) @@ -31,5 +118,19 @@ static inline int nfs4_init_session(struct nfs_server *server) return 0; } +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ + return 0; +} + +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ + return 0; +} + #endif /* defined(CONFIG_NFS_V4_1) */ +#endif /* IS_ENABLED(CONFIG_NFS_V4) */ #endif /* __LINUX_FS_NFS_NFS4SESSION_H */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1077b9698381..1402283d152d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -57,6 +57,7 @@ #include "callback.h" #include "delegation.h" #include "internal.h" +#include "nfs4session.h" #include "pnfs.h" #include "netns.h" diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a67040f51597..e786dc7582b1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -56,6 +56,7 @@ #include "nfs4_fs.h" #include "internal.h" +#include "nfs4session.h" #include "pnfs.h" #include "netns.h" diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 652d3f7176a9..e12cea4b36a5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -64,6 +64,7 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" +#include "nfs4session.h" #include "pnfs.h" #include "nfs.h" diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e707c1b69796..6c6ed153a9b4 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -198,53 +198,4 @@ struct nfs_server { #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) - -/* maximum number of slots to use */ -#define NFS4_DEF_SLOT_TABLE_SIZE (16U) -#define NFS4_MAX_SLOT_TABLE (256U) -#define NFS4_NO_SLOT ((u32)-1) - -#if IS_ENABLED(CONFIG_NFS_V4) - -/* Sessions */ -#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) -struct nfs4_slot_table { - struct nfs4_session *session; /* Parent session */ - struct nfs4_slot *slots; /* seqid per slot */ - unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ - spinlock_t slot_tbl_lock; - struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ - u32 max_slots; /* # slots in table */ - u32 max_slotid; /* Max allowed slotid value */ - u32 highest_used_slotid; /* sent to server on each SEQ. - * op for dynamic resizing */ - u32 target_highest_slotid; /* Server max_slot target */ - u32 server_highest_slotid; /* Server highest slotid */ - unsigned long generation; /* Generation counter for - target_highest_slotid */ - struct completion complete; -}; - -/* - * Session related parameters - */ -struct nfs4_session { - struct nfs4_sessionid sess_id; - u32 flags; - unsigned long session_state; - u32 hash_alg; - u32 ssv_len; - - /* The fore and back channel */ - struct nfs4_channel_attrs fc_attrs; - struct nfs4_slot_table fc_slot_table; - struct nfs4_channel_attrs bc_attrs; - struct nfs4_slot_table bc_slot_table; - struct nfs_client *clp; - /* Create session arguments */ - unsigned int fc_target_max_rqst_sz; - unsigned int fc_target_max_resp_sz; -}; - -#endif /* CONFIG_NFS_V4 */ #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 44d256f6021c..2076149db1a4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -185,16 +185,7 @@ struct nfs4_channel_attrs { u32 max_reqs; }; -/* nfs41 sessions slot seqid */ -struct nfs4_slot { - struct nfs4_slot_table *table; - struct nfs4_slot *next; - unsigned long generation; - unsigned long renewal_time; - u32 slot_nr; - u32 seq_nr; -}; - +struct nfs4_slot; struct nfs4_sequence_args { struct nfs4_slot *sa_slot; u8 sa_cache_this; -- cgit v1.2.3 From 8fe72bac8de784c4059b41a7dd6bb0151a3ae898 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Oct 2012 19:02:20 -0400 Subject: NFSv4: Clean up handling of privileged operations Privileged rpc calls are those that are run by the state recovery thread, in cases where we're trying to recover the system after a server reboot or a network partition. In those cases, we want to fence off all other rpc calls (see nfs4_begin_drain_session()) so that they don't end up using stateids or clientids that are in the process of being recovered. Prior to this patch, we had to set up special callback functions in order to declare an rpc call as being privileged. By adding a new field to the sequence arguments, this patch simplifies things considerably, and allows us to declare the rpc call as privileged before it is run. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 114 ++++++++++++++++++------------------------------ include/linux/nfs_xdr.h | 3 +- 2 files changed, 44 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4b1635ce658d..38a709d78594 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -490,11 +490,17 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args, { args->sa_slot = NULL; args->sa_cache_this = 0; + args->sa_privileged = 0; if (cache_reply) args->sa_cache_this = 1; res->sr_slot = NULL; } +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ + args->sa_privileged = 1; +} + int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -514,7 +520,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, spin_lock(&tbl->slot_tbl_lock); if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && - !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { + !args->sa_privileged) { /* The state manager will wait until the slot table is empty */ dprintk("%s session is draining\n", __func__); goto out_sleep; @@ -548,6 +554,9 @@ out_success: rpc_call_start(task); return 0; out_sleep: + /* Privileged tasks are queued with top priority */ + if (args->sa_privileged) + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); return -EAGAIN; @@ -593,12 +602,6 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) nfs41_setup_sequence(session, data->seq_args, data->seq_res, task); } -static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs41_call_sync_prepare(task, calldata); -} - static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) { struct nfs41_call_sync_data *data = calldata; @@ -611,17 +614,11 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; -static const struct rpc_call_ops nfs41_call_priv_sync_ops = { - .rpc_call_prepare = nfs41_call_priv_sync_prepare, - .rpc_call_done = nfs41_call_sync_done, -}; - static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int privileged) + struct nfs4_sequence_res *res) { int ret; struct rpc_task *task; @@ -637,8 +634,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .callback_data = &data }; - if (privileged) - task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); if (IS_ERR(task)) ret = PTR_ERR(task); @@ -656,16 +651,21 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res) { - return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); + return nfs4_call_sync_sequence(clnt, server, msg, args, res); } #else -static inline +static void nfs41_init_sequence(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply) { } +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ +} + + static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -1475,13 +1475,6 @@ unlock_no_action: rcu_read_unlock(); out_no_action: task->tk_action = NULL; - -} - -static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs4_open_prepare(task, calldata); } static void nfs4_open_done(struct rpc_task *task, void *calldata) @@ -1542,12 +1535,6 @@ static const struct rpc_call_ops nfs4_open_ops = { .rpc_release = nfs4_open_release, }; -static const struct rpc_call_ops nfs4_recover_open_ops = { - .rpc_call_prepare = nfs4_recover_open_prepare, - .rpc_call_done = nfs4_open_done, - .rpc_release = nfs4_open_release, -}; - static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) { struct inode *dir = data->dir->d_inode; @@ -1577,7 +1564,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) data->rpc_status = 0; data->cancelled = 0; if (isrecover) - task_setup_data.callback_ops = &nfs4_recover_open_ops; + nfs4_set_sequence_privileged(&o_arg->seq_args); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -4558,8 +4545,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) return; /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { - if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) + if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; + } data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; @@ -4574,13 +4562,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) nfs_release_seqid(data->arg.open_seqid); out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); - dprintk("%s: done!, ret = %d\n", __func__, task->tk_status); -} - -static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs4_lock_prepare(task, calldata); + dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } static void nfs4_lock_done(struct rpc_task *task, void *calldata) @@ -4635,12 +4617,6 @@ static const struct rpc_call_ops nfs4_lock_ops = { .rpc_release = nfs4_lock_release, }; -static const struct rpc_call_ops nfs4_recover_lock_ops = { - .rpc_call_prepare = nfs4_recover_lock_prepare, - .rpc_call_done = nfs4_lock_done, - .rpc_release = nfs4_lock_release, -}; - static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) { switch (error) { @@ -4683,15 +4659,15 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - if (recovery_type > NFS_LOCK_NEW) { - if (recovery_type == NFS_LOCK_RECLAIM) - data->arg.reclaim = NFS_LOCK_RECLAIM; - task_setup_data.callback_ops = &nfs4_recover_lock_ops; - } nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; + if (recovery_type > NFS_LOCK_NEW) { + if (recovery_type == NFS_LOCK_RECLAIM) + data->arg.reclaim = NFS_LOCK_RECLAIM; + nfs4_set_sequence_privileged(&data->arg.seq_args); + } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5432,7 +5408,6 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, (struct nfs4_get_lease_time_data *)calldata; dprintk("--> %s\n", __func__); - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); /* just setup sequence, do not trigger session recovery since we're invoked within one */ nfs41_setup_sequence(data->clp->cl_session, @@ -5500,6 +5475,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) int status; nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); + nfs4_set_sequence_privileged(&args.la_seq_args); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5775,26 +5751,15 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) nfs41_setup_sequence(clp->cl_session, args, res, task); } -static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs41_sequence_prepare(task, data); -} - static const struct rpc_call_ops nfs41_sequence_ops = { .rpc_call_done = nfs41_sequence_call_done, .rpc_call_prepare = nfs41_sequence_prepare, .rpc_release = nfs41_sequence_release, }; -static const struct rpc_call_ops nfs41_sequence_privileged_ops = { - .rpc_call_done = nfs41_sequence_call_done, - .rpc_call_prepare = nfs41_sequence_prepare_privileged, - .rpc_release = nfs41_sequence_release, -}; - -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred, - const struct rpc_call_ops *seq_ops) +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + struct rpc_cred *cred, + bool is_privileged) { struct nfs4_sequence_data *calldata; struct rpc_message msg = { @@ -5804,7 +5769,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ struct rpc_task_setup task_setup_data = { .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, - .callback_ops = seq_ops, + .callback_ops = &nfs41_sequence_ops, .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, }; @@ -5816,6 +5781,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ return ERR_PTR(-ENOMEM); } nfs41_init_sequence(&calldata->args, &calldata->res, 0); + if (is_privileged) + nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5831,7 +5798,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return 0; - task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops); + task = _nfs41_proc_sequence(clp, cred, false); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -5845,7 +5812,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops); + task = _nfs41_proc_sequence(clp, cred, true); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; @@ -5874,7 +5841,6 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) { struct nfs4_reclaim_complete_data *calldata = data; - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); nfs41_setup_sequence(calldata->clp->cl_session, &calldata->arg.seq_args, &calldata->res.seq_res, @@ -5955,6 +5921,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) calldata->arg.one_fs = 0; nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); + nfs4_set_sequence_privileged(&calldata->arg.seq_args); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -6521,7 +6488,9 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) dprintk("NFS call test_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(server->client, server, &msg, + &args.seq_args, &res.seq_res); if (status != NFS_OK) { dprintk("NFS reply test_stateid: failed, %d\n", status); return status; @@ -6568,8 +6537,9 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) dprintk("NFS call free_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); status = nfs4_call_sync_sequence(server->client, server, &msg, - &args.seq_args, &res.seq_res, 1); + &args.seq_args, &res.seq_res); dprintk("NFS reply free_stateid: %d\n", status); return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2076149db1a4..baa673edb597 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,7 +188,8 @@ struct nfs4_channel_attrs { struct nfs4_slot; struct nfs4_sequence_args { struct nfs4_slot *sa_slot; - u8 sa_cache_this; + u8 sa_cache_this : 1, + sa_privileged : 1; }; struct nfs4_sequence_res { -- cgit v1.2.3 From 62ae082d883d167cdaa7895cf2972d85e178228a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Nov 2012 17:10:01 -0500 Subject: NFSv4: Reorder the XDR structures to put sequence at the top, not bottom Pre-condition for optimising the slot allocation and reintroducing FIFO behaviour. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 138 ++++++++++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index baa673edb597..a55abd499c21 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -205,8 +205,8 @@ struct nfs4_get_lease_time_args { }; struct nfs4_get_lease_time_res { - struct nfs_fsinfo *lr_fsinfo; struct nfs4_sequence_res lr_seq_res; + struct nfs_fsinfo *lr_fsinfo; }; #define PNFS_LAYOUT_MAXSIZE 4096 @@ -224,23 +224,23 @@ struct pnfs_layout_range { }; struct nfs4_layoutget_args { + struct nfs4_sequence_args seq_args; __u32 type; struct pnfs_layout_range range; __u64 minlength; __u32 maxcount; struct inode *inode; struct nfs_open_context *ctx; - struct nfs4_sequence_args seq_args; nfs4_stateid stateid; struct nfs4_layoutdriver_data layout; }; struct nfs4_layoutget_res { + struct nfs4_sequence_res seq_res; __u32 return_on_close; struct pnfs_layout_range range; __u32 type; nfs4_stateid stateid; - struct nfs4_sequence_res seq_res; struct nfs4_layoutdriver_data *layoutp; }; @@ -251,38 +251,38 @@ struct nfs4_layoutget { }; struct nfs4_getdevicelist_args { + struct nfs4_sequence_args seq_args; const struct nfs_fh *fh; u32 layoutclass; - struct nfs4_sequence_args seq_args; }; struct nfs4_getdevicelist_res { - struct pnfs_devicelist *devlist; struct nfs4_sequence_res seq_res; + struct pnfs_devicelist *devlist; }; struct nfs4_getdeviceinfo_args { - struct pnfs_device *pdev; struct nfs4_sequence_args seq_args; + struct pnfs_device *pdev; }; struct nfs4_getdeviceinfo_res { - struct pnfs_device *pdev; struct nfs4_sequence_res seq_res; + struct pnfs_device *pdev; }; struct nfs4_layoutcommit_args { + struct nfs4_sequence_args seq_args; nfs4_stateid stateid; __u64 lastbytewritten; struct inode *inode; const u32 *bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_layoutcommit_res { + struct nfs4_sequence_res seq_res; struct nfs_fattr *fattr; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; int status; }; @@ -296,11 +296,11 @@ struct nfs4_layoutcommit_data { }; struct nfs4_layoutreturn_args { + struct nfs4_sequence_args seq_args; struct pnfs_layout_hdr *layout; struct inode *inode; nfs4_stateid stateid; __u32 layout_type; - struct nfs4_sequence_args seq_args; }; struct nfs4_layoutreturn_res { @@ -326,6 +326,7 @@ struct stateowner_id { * Arguments to the open call. */ struct nfs_openargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; struct nfs_seqid * seqid; int open_flags; @@ -346,10 +347,10 @@ struct nfs_openargs { const u32 * bitmask; const u32 * open_bitmap; __u32 claim; - struct nfs4_sequence_args seq_args; }; struct nfs_openres { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_fh fh; struct nfs4_change_info cinfo; @@ -364,7 +365,6 @@ struct nfs_openres { __u32 attrset[NFS4_BITMAP_SIZE]; struct nfs4_string *owner; struct nfs4_string *group_owner; - struct nfs4_sequence_res seq_res; __u32 access_request; __u32 access_supported; __u32 access_result; @@ -388,20 +388,20 @@ struct nfs_open_confirmres { * Arguments to the close call. */ struct nfs_closeargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; nfs4_stateid * stateid; struct nfs_seqid * seqid; fmode_t fmode; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_closeres { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_fattr * fattr; struct nfs_seqid * seqid; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * * Arguments to the lock,lockt, and locku call. @@ -413,6 +413,7 @@ struct nfs_lowner { }; struct nfs_lock_args { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct file_lock * fl; struct nfs_seqid * lock_seqid; @@ -423,40 +424,39 @@ struct nfs_lock_args { unsigned char block : 1; unsigned char reclaim : 1; unsigned char new_lock_owner : 1; - struct nfs4_sequence_args seq_args; }; struct nfs_lock_res { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_seqid * lock_seqid; struct nfs_seqid * open_seqid; - struct nfs4_sequence_res seq_res; }; struct nfs_locku_args { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct file_lock * fl; struct nfs_seqid * seqid; nfs4_stateid * stateid; - struct nfs4_sequence_args seq_args; }; struct nfs_locku_res { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_seqid * seqid; - struct nfs4_sequence_res seq_res; }; struct nfs_lockt_args { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct file_lock * fl; struct nfs_lowner lock_owner; - struct nfs4_sequence_args seq_args; }; struct nfs_lockt_res { - struct file_lock * denied; /* LOCK, LOCKT failed */ struct nfs4_sequence_res seq_res; + struct file_lock * denied; /* LOCK, LOCKT failed */ }; struct nfs_release_lockowner_args { @@ -464,22 +464,23 @@ struct nfs_release_lockowner_args { }; struct nfs4_delegreturnargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_delegreturnres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * Arguments to the read call. */ struct nfs_readargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; @@ -487,20 +488,20 @@ struct nfs_readargs { __u32 count; unsigned int pgbase; struct page ** pages; - struct nfs4_sequence_args seq_args; }; struct nfs_readres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; __u32 count; int eof; - struct nfs4_sequence_res seq_res; }; /* * Arguments to the write call. */ struct nfs_writeargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; @@ -510,7 +511,6 @@ struct nfs_writeargs { unsigned int pgbase; struct page ** pages; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_write_verifier { @@ -523,65 +523,65 @@ struct nfs_writeverf { }; struct nfs_writeres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; struct nfs_writeverf * verf; __u32 count; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * Arguments to the commit call. */ struct nfs_commitargs { + struct nfs4_sequence_args seq_args; struct nfs_fh *fh; __u64 offset; __u32 count; const u32 *bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_commitres { + struct nfs4_sequence_res seq_res; struct nfs_fattr *fattr; struct nfs_writeverf *verf; const struct nfs_server *server; - struct nfs4_sequence_res seq_res; }; /* * Common arguments to the unlink call */ struct nfs_removeargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh *fh; struct qstr name; - struct nfs4_sequence_args seq_args; }; struct nfs_removeres { + struct nfs4_sequence_res seq_res; const struct nfs_server *server; struct nfs_fattr *dir_attr; struct nfs4_change_info cinfo; - struct nfs4_sequence_res seq_res; }; /* * Common arguments to the rename call */ struct nfs_renameargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh *old_dir; const struct nfs_fh *new_dir; const struct qstr *old_name; const struct qstr *new_name; - struct nfs4_sequence_args seq_args; }; struct nfs_renameres { + struct nfs4_sequence_res seq_res; const struct nfs_server *server; struct nfs4_change_info old_cinfo; struct nfs_fattr *old_fattr; struct nfs4_change_info new_cinfo; struct nfs_fattr *new_fattr; - struct nfs4_sequence_res seq_res; }; /* @@ -622,20 +622,20 @@ struct nfs_createargs { }; struct nfs_setattrargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; nfs4_stateid stateid; struct iattr * iap; const struct nfs_server * server; /* Needed for name mapping */ const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs_setaclargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; unsigned int acl_pgbase; struct page ** acl_pages; - struct nfs4_sequence_args seq_args; }; struct nfs_setaclres { @@ -643,27 +643,27 @@ struct nfs_setaclres { }; struct nfs_getaclargs { + struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; unsigned int acl_pgbase; struct page ** acl_pages; - struct nfs4_sequence_args seq_args; }; /* getxattr ACL interface flags */ #define NFS4_ACL_TRUNC 0x0001 /* ACL was truncated */ struct nfs_getaclres { + struct nfs4_sequence_res seq_res; size_t acl_len; size_t acl_data_offset; int acl_flags; struct page * acl_scratch; - struct nfs4_sequence_res seq_res; }; struct nfs_setattrres { + struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; const struct nfs_server * server; - struct nfs4_sequence_res seq_res; }; struct nfs_linkargs { @@ -828,21 +828,22 @@ struct nfs3_getaclres { typedef u64 clientid4; struct nfs4_accessargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; u32 access; - struct nfs4_sequence_args seq_args; }; struct nfs4_accessres { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; u32 supported; u32 access; - struct nfs4_sequence_res seq_res; }; struct nfs4_create_arg { + struct nfs4_sequence_args seq_args; u32 ftype; union { struct { @@ -859,88 +860,88 @@ struct nfs4_create_arg { const struct iattr * attrs; const struct nfs_fh * dir_fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_create_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fh * fh; struct nfs_fattr * fattr; struct nfs4_change_info dir_cinfo; - struct nfs4_sequence_res seq_res; }; struct nfs4_fsinfo_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_fsinfo_res { - struct nfs_fsinfo *fsinfo; struct nfs4_sequence_res seq_res; + struct nfs_fsinfo *fsinfo; }; struct nfs4_getattr_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_getattr_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; - struct nfs4_sequence_res seq_res; }; struct nfs4_link_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const struct nfs_fh * dir_fh; const struct qstr * name; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_link_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; struct nfs4_change_info cinfo; struct nfs_fattr * dir_attr; - struct nfs4_sequence_res seq_res; }; struct nfs4_lookup_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * dir_fh; const struct qstr * name; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_lookup_res { + struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; struct nfs_fh * fh; - struct nfs4_sequence_res seq_res; }; struct nfs4_lookup_root_arg { - const u32 * bitmask; struct nfs4_sequence_args seq_args; + const u32 * bitmask; }; struct nfs4_pathconf_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_pathconf_res { - struct nfs_pathconf *pathconf; struct nfs4_sequence_res seq_res; + struct nfs_pathconf *pathconf; }; struct nfs4_readdir_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; u64 cookie; nfs4_verifier verifier; @@ -949,21 +950,20 @@ struct nfs4_readdir_arg { unsigned int pgbase; /* zero-copy data */ const u32 * bitmask; int plus; - struct nfs4_sequence_args seq_args; }; struct nfs4_readdir_res { + struct nfs4_sequence_res seq_res; nfs4_verifier verifier; unsigned int pgbase; - struct nfs4_sequence_res seq_res; }; struct nfs4_readlink { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; unsigned int pgbase; unsigned int pglen; /* zero-copy data */ struct page ** pages; /* zero-copy data */ - struct nfs4_sequence_args seq_args; }; struct nfs4_readlink_res { @@ -989,28 +989,28 @@ struct nfs4_setclientid_res { }; struct nfs4_statfs_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_statfs_res { - struct nfs_fsstat *fsstat; struct nfs4_sequence_res seq_res; + struct nfs_fsstat *fsstat; }; struct nfs4_server_caps_arg { - struct nfs_fh *fhandle; struct nfs4_sequence_args seq_args; + struct nfs_fh *fhandle; }; struct nfs4_server_caps_res { + struct nfs4_sequence_res seq_res; u32 attr_bitmask[3]; u32 acl_bitmask; u32 has_links; u32 has_symlinks; u32 fh_expire_type; - struct nfs4_sequence_res seq_res; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 @@ -1036,16 +1036,16 @@ struct nfs4_fs_locations { }; struct nfs4_fs_locations_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh *dir_fh; const struct qstr *name; struct page *page; const u32 *bitmask; - struct nfs4_sequence_args seq_args; }; struct nfs4_fs_locations_res { - struct nfs4_fs_locations *fs_locations; struct nfs4_sequence_res seq_res; + struct nfs4_fs_locations *fs_locations; }; struct nfs4_secinfo_oid { @@ -1070,14 +1070,14 @@ struct nfs4_secinfo_flavors { }; struct nfs4_secinfo_arg { + struct nfs4_sequence_args seq_args; const struct nfs_fh *dir_fh; const struct qstr *name; - struct nfs4_sequence_args seq_args; }; struct nfs4_secinfo_res { - struct nfs4_secinfo_flavors *flavors; struct nfs4_sequence_res seq_res; + struct nfs4_secinfo_flavors *flavors; }; #endif /* CONFIG_NFS_V4 */ @@ -1157,9 +1157,9 @@ struct nfs41_create_session_res { }; struct nfs41_reclaim_complete_args { + struct nfs4_sequence_args seq_args; /* In the future extend to include curr_fh for use with migration */ unsigned char one_fs:1; - struct nfs4_sequence_args seq_args; }; struct nfs41_reclaim_complete_res { @@ -1169,28 +1169,28 @@ struct nfs41_reclaim_complete_res { #define SECINFO_STYLE_CURRENT_FH 0 #define SECINFO_STYLE_PARENT 1 struct nfs41_secinfo_no_name_args { - int style; struct nfs4_sequence_args seq_args; + int style; }; struct nfs41_test_stateid_args { - nfs4_stateid *stateid; struct nfs4_sequence_args seq_args; + nfs4_stateid *stateid; }; struct nfs41_test_stateid_res { - unsigned int status; struct nfs4_sequence_res seq_res; + unsigned int status; }; struct nfs41_free_stateid_args { - nfs4_stateid *stateid; struct nfs4_sequence_args seq_args; + nfs4_stateid *stateid; }; struct nfs41_free_stateid_res { - unsigned int status; struct nfs4_sequence_res seq_res; + unsigned int status; }; #else -- cgit v1.2.3 From c05eecf636101dd4347b2d8fa457626bf0088e0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Nov 2012 23:59:29 -0500 Subject: SUNRPC: Don't allow low priority tasks to pre-empt higher priority ones Currently, the priority queues attempt to be 'fair' to lower priority tasks by scheduling them after a certain number of higher priority tasks have run. The problem is that both the transport send queue and the NFSv4.1 session slot queue have strong ordering requirements. This patch therefore removes the fairness code in favour of strong ordering of task priorities. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 - net/sunrpc/sched.c | 44 ++++++++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index dc0c3cc3ada3..b64f8eb0b973 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -192,7 +192,6 @@ struct rpc_wait_queue { pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ - unsigned char count; /* # task groups remaining serviced so far */ unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1aefc9fef866..d17a704aaf5f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -98,6 +98,23 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } +static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) +{ + queue->priority = priority; +} + +static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) +{ + queue->owner = pid; + queue->nr = RPC_BATCH_COUNT; +} + +static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) +{ + rpc_set_waitqueue_priority(queue, queue->maxpriority); + rpc_set_waitqueue_owner(queue, 0); +} + /* * Add new request to a priority queue. */ @@ -109,9 +126,11 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *t; INIT_LIST_HEAD(&task->u.tk_wait.links); - q = &queue->tasks[queue_priority]; if (unlikely(queue_priority > queue->maxpriority)) - q = &queue->tasks[queue->maxpriority]; + queue_priority = queue->maxpriority; + if (queue_priority > queue->priority) + rpc_set_waitqueue_priority(queue, queue_priority); + q = &queue->tasks[queue_priority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); @@ -180,24 +199,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas task->tk_pid, queue, rpc_qname(queue)); } -static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) -{ - queue->priority = priority; - queue->count = 1 << (priority * 2); -} - -static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - -static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) -{ - rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); -} - static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) { int i; @@ -464,8 +465,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q /* * Check if we need to switch queues. */ - if (--queue->count) - goto new_owner; + goto new_owner; } /* -- cgit v1.2.3 From cf66bb93e0f75e0a4ba1ec070692618fa028e994 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 3 Dec 2012 16:25:40 +0000 Subject: byteorder: allow arch to opt to use GCC intrinsics for byteswapping Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16() intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC, 48 for other platforms). By using these instead of the inline assembler that most architectures have in their __arch_swabXX() macros, we let the compiler see what's actually happening. The resulting code should be at least as good, and much *better* in the cases where it can be combined with a nearby load or store, using a load-and-byteswap or store-and-byteswap instruction (e.g. lwbrx/stwbrx on PowerPC, movbe on Atom). When GCC is sufficiently recent *and* the architecture opts in to using the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be used in preference to the __arch_swabXX() macros. An architecture which does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own hand-crafted macros. Signed-off-by: David Woodhouse Acked-by: H. Peter Anvin --- arch/Kconfig | 19 +++++++++++++++++++ include/linux/compiler-gcc4.h | 10 ++++++++++ include/linux/compiler-intel.h | 7 +++++++ include/uapi/linux/swab.h | 12 +++++++++--- 4 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 366ec06a5185..c31416b10586 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -112,6 +112,25 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS See Documentation/unaligned-memory-access.txt for more information on the topic of unaligned memory accesses. +config ARCH_USE_BUILTIN_BSWAP + bool + help + Modern versions of GCC (since 4.4) have builtin functions + for handling byte-swapping. Using these, instead of the old + inline assembler that the architecture code provides in the + __arch_bswapXX() macros, allows the compiler to see what's + happening and offers more opportunity for optimisation. In + particular, the compiler will be able to combine the byteswap + with a nearby load or store and use load-and-swap or + store-and-swap instructions if the architecture has them. It + should almost *never* result in code which is worse than the + hand-coded assembler in . But just in case it + does, the use of the builtins is optional. + + Any architecture with load-and-swap or store-and-swap + instructions should set this. And it shouldn't hurt to set it + on architectures that don't have such instructions. + config HAVE_SYSCALL_WRAPPERS bool diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 412bc6c2b023..dc16a858e77c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -63,3 +63,13 @@ #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) #endif + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if __GNUC_MINOR__ >= 4 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d8e636e5607d..973ce10c40b6 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -29,3 +29,10 @@ #endif #define uninitialized_var(x) x + +#ifndef __HAVE_BUILTIN_BSWAP16__ +/* icc has this, but it's called _bswap16 */ +#define __HAVE_BUILTIN_BSWAP16__ +#define __builtin_bswap16 _bswap16 +#endif + diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index e811474724c2..0e011eb91b5d 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -45,7 +45,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) { -#ifdef __arch_swab16 +#ifdef __HAVE_BUILTIN_BSWAP16__ + return __builtin_bswap16(val); +#elif defined (__arch_swab16) return __arch_swab16(val); #else return ___constant_swab16(val); @@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) static inline __attribute_const__ __u32 __fswab32(__u32 val) { -#ifdef __arch_swab32 +#ifdef __HAVE_BUILTIN_BSWAP32__ + return __builtin_bswap32(val); +#elif defined(__arch_swab32) return __arch_swab32(val); #else return ___constant_swab32(val); @@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val) static inline __attribute_const__ __u64 __fswab64(__u64 val) { -#ifdef __arch_swab64 +#ifdef __HAVE_BUILTIN_BSWAP64__ + return __builtin_bswap64(val); +#elif defined (__arch_swab64) return __arch_swab64(val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; -- cgit v1.2.3 From 3f3299d5c0268d6cc3f47b446e8aca436e4a5651 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Nov 2012 13:42:38 +0100 Subject: block: Rename queue dead flag QUEUE_FLAG_DEAD is used to indicate that queuing new requests must stop. After this flag has been set queue draining starts. However, during the queue draining phase it is still safe to invoke the queue's request_fn, so QUEUE_FLAG_DYING is a better name for this flag. This patch has been generated by running the following command over the kernel source tree: git grep -lEw 'blk_queue_dead|QUEUE_FLAG_DEAD' | xargs sed -i.tmp -e 's/blk_queue_dead/blk_queue_dying/g' \ -e 's/QUEUE_FLAG_DEAD/QUEUE_FLAG_DYING/g'; \ sed -i.tmp -e "s/QUEUE_FLAG_DYING$(printf \\t)*5/QUEUE_FLAG_DYING$(printf \\t)5/g" \ include/linux/blkdev.h; \ sed -i.tmp -e 's/ DEAD/ DYING/g' -e 's/dead queue/a dying queue/' \ -e 's/Dead queue/A dying queue/' block/blk-core.c Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Cc: James Bottomley Cc: Mike Christie Cc: Jens Axboe Cc: Chanho Min Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-core.c | 26 +++++++++++++------------- block/blk-exec.c | 2 +- block/blk-sysfs.c | 4 ++-- block/blk-throttle.c | 2 +- block/blk.h | 2 +- drivers/scsi/scsi_lib.c | 2 +- include/linux/blkdev.h | 4 ++-- 8 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index d0b770391ad4..5dea4e8dbc55 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -231,7 +231,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); + return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); return __blkg_lookup_create(blkcg, q, NULL); } EXPORT_SYMBOL_GPL(blkg_lookup_create); diff --git a/block/blk-core.c b/block/blk-core.c index ee0e5cafa859..1a95272cca50 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -473,20 +473,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end); * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown * - * Mark @q DEAD, drain all pending requests, destroy and put it. All + * Mark @q DYING, drain all pending requests, destroy and put it. All * future requests will be failed immediately with -ENODEV. */ void blk_cleanup_queue(struct request_queue *q) { spinlock_t *lock = q->queue_lock; - /* mark @q DEAD, no new request or merges will be allowed afterwards */ + /* mark @q DYING, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); - queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); + queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); spin_lock_irq(lock); /* - * Dead queue is permanently in bypass mode till released. Note + * A dying queue is permanently in bypass mode till released. Note * that, unlike blk_queue_bypass_start(), we aren't performing * synchronize_rcu() after entering bypass mode to avoid the delay * as some drivers create and destroy a lot of queues while @@ -499,11 +499,11 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set(QUEUE_FLAG_NOMERGES, q); queue_flag_set(QUEUE_FLAG_NOXMERGES, q); - queue_flag_set(QUEUE_FLAG_DEAD, q); + queue_flag_set(QUEUE_FLAG_DYING, q); spin_unlock_irq(lock); mutex_unlock(&q->sysfs_lock); - /* drain all requests queued before DEAD marking */ + /* drain all requests queued before DYING marking */ blk_drain_queue(q, true); /* @q won't process any more request, flush async actions */ @@ -716,7 +716,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue); bool blk_get_queue(struct request_queue *q) { - if (likely(!blk_queue_dead(q))) { + if (likely(!blk_queue_dying(q))) { __blk_get_queue(q); return true; } @@ -870,7 +870,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, const bool is_sync = rw_is_sync(rw_flags) != 0; int may_queue; - if (unlikely(blk_queue_dead(q))) + if (unlikely(blk_queue_dying(q))) return NULL; may_queue = elv_may_queue(q, rw_flags); @@ -1050,7 +1050,7 @@ retry: if (rq) return rq; - if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) { + if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { blk_put_rl(rl); return NULL; } @@ -1910,7 +1910,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) return -EIO; spin_lock_irqsave(q->queue_lock, flags); - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_dying(q))) { spin_unlock_irqrestore(q->queue_lock, flags); return -ENODEV; } @@ -2885,9 +2885,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, trace_block_unplug(q, depth, !from_schedule); /* - * Don't mess with dead queue. + * Don't mess with a dying queue. */ - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_dying(q))) { spin_unlock(q->queue_lock); return; } @@ -2996,7 +2996,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) /* * Short-circuit if @q is dead */ - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_dying(q))) { __blk_end_request_all(rq, -ENODEV); continue; } diff --git a/block/blk-exec.c b/block/blk-exec.c index 8b6dc5bd4dd0..4aec98df7ba5 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -60,7 +60,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, spin_lock_irq(q->queue_lock); - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_dying(q))) { rq->errors = -ENXIO; if (rq->end_io) rq->end_io(rq, rq->errors); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ce6204608822..788147797a79 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -466,7 +466,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; mutex_lock(&q->sysfs_lock); - if (blk_queue_dead(q)) { + if (blk_queue_dying(q)) { mutex_unlock(&q->sysfs_lock); return -ENOENT; } @@ -488,7 +488,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, q = container_of(kobj, struct request_queue, kobj); mutex_lock(&q->sysfs_lock); - if (blk_queue_dead(q)) { + if (blk_queue_dying(q)) { mutex_unlock(&q->sysfs_lock); return -ENOENT; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a9664fa0b609..31146225f3d0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -302,7 +302,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, /* if %NULL and @q is alive, fall back to root_tg */ if (!IS_ERR(blkg)) tg = blkg_to_tg(blkg); - else if (!blk_queue_dead(q)) + else if (!blk_queue_dying(q)) tg = td_root_tg(td); } diff --git a/block/blk.h b/block/blk.h index ca51543b248c..2218a8a78292 100644 --- a/block/blk.h +++ b/block/blk.h @@ -96,7 +96,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) q->flush_queue_delayed = 1; return NULL; } - if (unlikely(blk_queue_dead(q)) || + if (unlikely(blk_queue_dying(q)) || !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) return NULL; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index da36a3a81a9e..f29a1a9b54d2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1406,7 +1406,7 @@ static int scsi_lld_busy(struct request_queue *q) struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost; - if (blk_queue_dead(q)) + if (blk_queue_dying(q)) return 0; shost = sdev->host; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1756001210d2..aba8246afe72 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -437,7 +437,7 @@ struct request_queue { #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ -#define QUEUE_FLAG_DEAD 5 /* queue being torn down */ +#define QUEUE_FLAG_DYING 5 /* queue being torn down */ #define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ @@ -521,7 +521,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) -#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) +#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From c246e80d86736312933646896c4157daf511dadc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 6 Dec 2012 14:32:01 +0100 Subject: block: Avoid that request_fn is invoked on a dead queue A block driver may start cleaning up resources needed by its request_fn as soon as blk_cleanup_queue() finished, so request_fn must not be invoked after draining finished. This is important when blk_run_queue() is invoked without any requests in progress. As an example, if blk_drain_queue() and scsi_run_queue() run in parallel, blk_drain_queue() may have finished all requests after scsi_run_queue() has taken a SCSI device off the starved list but before that last function has had a chance to run the queue. Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Mike Christie Cc: Chanho Min Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 31 +++++++++++++++++++++++++++---- block/blk-exec.c | 2 +- block/blk.h | 2 ++ include/linux/blkdev.h | 2 ++ 4 files changed, 32 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index a182b586b06a..f52d05ff5d24 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -292,6 +292,25 @@ void blk_sync_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_sync_queue); +/** + * __blk_run_queue_uncond - run a queue whether or not it has been stopped + * @q: The queue to run + * + * Description: + * Invoke request handling on a queue if there are any pending requests. + * May be used to restart request handling after a request has completed. + * This variant runs the queue whether or not the queue has been + * stopped. Must be called with the queue lock held and interrupts + * disabled. See also @blk_run_queue. + */ +inline void __blk_run_queue_uncond(struct request_queue *q) +{ + if (unlikely(blk_queue_dead(q))) + return; + + q->request_fn(q); +} + /** * __blk_run_queue - run a single device queue * @q: The queue to run @@ -305,7 +324,7 @@ void __blk_run_queue(struct request_queue *q) if (unlikely(blk_queue_stopped(q))) return; - q->request_fn(q); + __blk_run_queue_uncond(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -477,8 +496,8 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end); * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown * - * Mark @q DYING, drain all pending requests, destroy and put it. All - * future requests will be failed immediately with -ENODEV. + * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and + * put it. All future requests will be failed immediately with -ENODEV. */ void blk_cleanup_queue(struct request_queue *q) { @@ -507,9 +526,13 @@ void blk_cleanup_queue(struct request_queue *q) spin_unlock_irq(lock); mutex_unlock(&q->sysfs_lock); - /* drain all requests queued before DYING marking */ + /* + * Drain all requests queued before DYING marking. Set DEAD flag to + * prevent that q->request_fn() gets invoked after draining finished. + */ spin_lock_irq(lock); __blk_drain_queue(q, true); + queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); /* @q won't process any more request, flush async actions */ diff --git a/block/blk-exec.c b/block/blk-exec.c index 4aec98df7ba5..1320e74d79b8 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -72,7 +72,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, __blk_run_queue(q); /* the queue is stopped so it won't be run */ if (rq->cmd_type == REQ_TYPE_PM_RESUME) - q->request_fn(q); + __blk_run_queue_uncond(q); spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); diff --git a/block/blk.h b/block/blk.h index 2218a8a78292..47fdfdd41520 100644 --- a/block/blk.h +++ b/block/blk.h @@ -145,6 +145,8 @@ int blk_try_merge(struct request *rq, struct bio *bio); void blk_queue_congestion_threshold(struct request_queue *q); +void __blk_run_queue_uncond(struct request_queue *q); + int blk_dev_init(void); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aba8246afe72..8bc46c250ca4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -452,6 +452,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ #define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ +#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -522,6 +523,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) +#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From 24faf6f604efe18236bded4303009fc252913bf0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Nov 2012 13:46:45 +0100 Subject: block: Make blk_cleanup_queue() wait until request_fn finished Some request_fn implementations, e.g. scsi_request_fn(), unlock the queue lock internally. This may result in multiple threads executing request_fn for the same queue simultaneously. Keep track of the number of active request_fn calls and make sure that blk_cleanup_queue() waits until all active request_fn invocations have finished. A block driver may start cleaning up resources needed by its request_fn as soon as blk_cleanup_queue() finished, so blk_cleanup_queue() must wait for all outstanding request_fn invocations to finish. Signed-off-by: Bart Van Assche Reported-by: Chanho Min Cc: James Bottomley Cc: Mike Christie Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 10 ++++++++++ include/linux/blkdev.h | 6 ++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 9fb23537c7ad..473015ee28a2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -309,7 +309,16 @@ inline void __blk_run_queue_uncond(struct request_queue *q) if (unlikely(blk_queue_dead(q))) return; + /* + * Some request_fn implementations, e.g. scsi_request_fn(), unlock + * the queue lock internally. As a result multiple threads may be + * running such a request function concurrently. Keep track of the + * number of active request_fn invocations such that blk_drain_queue() + * can wait until all these request_fn calls have finished. + */ + q->request_fn_active++; q->request_fn(q); + q->request_fn_active--; } /** @@ -408,6 +417,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) __blk_run_queue(q); drain |= q->nr_rqs_elvpriv; + drain |= q->request_fn_active; /* * Unfortunately, requests are queued at and tracked from diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8bc46c250ca4..c9d233e727f2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -378,6 +378,12 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; + /* + * Number of active block driver functions for which blk_drain_queue() + * must wait. Must be incremented around functions that unlock the + * queue_lock internally, e.g. scsi_request_fn(). + */ + unsigned int request_fn_active; unsigned int rq_timeout; struct timer_list timeout; -- cgit v1.2.3 From 80729beb3326fd682543f4f4ea534df47ab48967 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Nov 2012 13:47:36 +0100 Subject: bsg: Remove unused function bsg_goose_queue() The function bsg_goose_queue() does not have any in-tree callers, so let's remove it. Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bsg-lib.c | 13 ------------- include/linux/bsg-lib.h | 1 - 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/block/bsg-lib.c b/block/bsg-lib.c index deee61fbb741..650f427d915b 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -151,19 +151,6 @@ failjob_rls_job: return -ENOMEM; } -/* - * bsg_goose_queue - restart queue in case it was stopped - * @q: request q to be restarted - */ -void bsg_goose_queue(struct request_queue *q) -{ - if (!q) - return; - - blk_run_queue_async(q); -} -EXPORT_SYMBOL_GPL(bsg_goose_queue); - /** * bsg_request_fn - generic handler for bsg requests * @q: request queue to manage diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 4d0fb3df2f4a..a226652a5a6c 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -67,6 +67,5 @@ void bsg_job_done(struct bsg_job *job, int result, int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, bsg_job_fn *job_fn, int dd_job_size); void bsg_request_fn(struct request_queue *q); -void bsg_goose_queue(struct request_queue *q); #endif -- cgit v1.2.3 From be2f6f5a7833b99bdee97c4b877dcd2afc6cdd00 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 9 Dec 2012 15:40:30 +0100 Subject: mfd: wm5102: Add readback of DSP status 3 register Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/wm5102-tables.c | 2 ++ include/linux/mfd/arizona/registers.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index 005de63d01d6..8844b2f9de87 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -2336,6 +2336,7 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg) case ARIZONA_DSP1_CLOCKING_1: case ARIZONA_DSP1_STATUS_1: case ARIZONA_DSP1_STATUS_2: + case ARIZONA_DSP1_STATUS_3: return true; default: return false; @@ -2386,6 +2387,7 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_AOD_IRQ_RAW_STATUS: case ARIZONA_DSP1_STATUS_1: case ARIZONA_DSP1_STATUS_2: + case ARIZONA_DSP1_STATUS_3: case ARIZONA_HEADPHONE_DETECT_2: case ARIZONA_MIC_DETECT_3: return true; diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index 7671a287dfee..81bca23c9a92 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -979,6 +979,7 @@ #define ARIZONA_DSP1_CLOCKING_1 0x1101 #define ARIZONA_DSP1_STATUS_1 0x1104 #define ARIZONA_DSP1_STATUS_2 0x1105 +#define ARIZONA_DSP1_STATUS_3 0x1106 #define ARIZONA_DSP2_CONTROL_1 0x1200 #define ARIZONA_DSP2_CLOCKING_1 0x1201 #define ARIZONA_DSP2_STATUS_1 0x1204 -- cgit v1.2.3 From dd31866b0d55c9b70722ebad6ccd643223d9269e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 2 Nov 2012 17:06:26 +0900 Subject: f2fs: add on-disk layout This adds a header file describing the on-disk layout of f2fs. Signed-off-by: Changman Lee Signed-off-by: Chul Lee Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 410 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 include/linux/f2fs_fs.h (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h new file mode 100644 index 000000000000..1429ece7caab --- /dev/null +++ b/include/linux/f2fs_fs.h @@ -0,0 +1,410 @@ +/** + * include/linux/f2fs_fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_FS_H +#define _LINUX_F2FS_FS_H + +#include +#include + +#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ +#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ +#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ +#define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ + +#define NULL_ADDR 0x0U +#define NEW_ADDR -1U + +#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) +#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) +#define F2FS_META_INO(sbi) (sbi->meta_ino_num) + +/* This flag is used by node and meta inodes, and by recovery */ +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) + +/* + * For further optimization on multi-head logs, on-disk layout supports maximum + * 16 logs by default. The number, 16, is expected to cover all the cases + * enoughly. The implementaion currently uses no more than 6 logs. + * Half the logs are used for nodes, and the other half are used for data. + */ +#define MAX_ACTIVE_LOGS 16 +#define MAX_ACTIVE_NODE_LOGS 8 +#define MAX_ACTIVE_DATA_LOGS 8 + +/* + * For superblock + */ +struct f2fs_super_block { + __le32 magic; /* Magic Number */ + __le16 major_ver; /* Major Version */ + __le16 minor_ver; /* Minor Version */ + __le32 log_sectorsize; /* log2 sector size in bytes */ + __le32 log_sectors_per_block; /* log2 # of sectors per block */ + __le32 log_blocksize; /* log2 block size in bytes */ + __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ + __le32 segs_per_sec; /* # of segments per section */ + __le32 secs_per_zone; /* # of sections per zone */ + __le32 checksum_offset; /* checksum offset inside super block */ + __le64 block_count; /* total # of user blocks */ + __le32 section_count; /* total # of sections */ + __le32 segment_count; /* total # of segments */ + __le32 segment_count_ckpt; /* # of segments for checkpoint */ + __le32 segment_count_sit; /* # of segments for SIT */ + __le32 segment_count_nat; /* # of segments for NAT */ + __le32 segment_count_ssa; /* # of segments for SSA */ + __le32 segment_count_main; /* # of segments for main area */ + __le32 segment0_blkaddr; /* start block address of segment 0 */ + __le32 cp_blkaddr; /* start block address of checkpoint */ + __le32 sit_blkaddr; /* start block address of SIT */ + __le32 nat_blkaddr; /* start block address of NAT */ + __le32 ssa_blkaddr; /* start block address of SSA */ + __le32 main_blkaddr; /* start block address of main area */ + __le32 root_ino; /* root inode number */ + __le32 node_ino; /* node inode number */ + __le32 meta_ino; /* meta inode number */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __le16 volume_name[512]; /* volume name */ + __le32 extension_count; /* # of extensions below */ + __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ +} __packed; + +/* + * For checkpoint + */ +#define CP_ERROR_FLAG 0x00000008 +#define CP_COMPACT_SUM_FLAG 0x00000004 +#define CP_ORPHAN_PRESENT_FLAG 0x00000002 +#define CP_UMOUNT_FLAG 0x00000001 + +struct f2fs_checkpoint { + __le64 checkpoint_ver; /* checkpoint block version number */ + __le64 user_block_count; /* # of user blocks */ + __le64 valid_block_count; /* # of valid blocks in main area */ + __le32 rsvd_segment_count; /* # of reserved segments for gc */ + __le32 overprov_segment_count; /* # of overprovision segments */ + __le32 free_segment_count; /* # of free segments in main area */ + + /* information of current node segments */ + __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; + __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; + /* information of current data segments */ + __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; + __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; + __le32 ckpt_flags; /* Flags : umount and journal_present */ + __le32 cp_pack_total_block_count; /* total # of one cp pack */ + __le32 cp_pack_start_sum; /* start block number of data summary */ + __le32 valid_node_count; /* Total number of valid nodes */ + __le32 valid_inode_count; /* Total number of valid inodes */ + __le32 next_free_nid; /* Next free node number */ + __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ + __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ + __le32 checksum_offset; /* checksum offset inside cp block */ + __le64 elapsed_time; /* mounted time */ + /* allocation type of current segment */ + unsigned char alloc_type[MAX_ACTIVE_LOGS]; + + /* SIT and NAT version bitmap */ + unsigned char sit_nat_version_bitmap[1]; +} __packed; + +/* + * For orphan inode management + */ +#define F2FS_ORPHANS_PER_BLOCK 1020 + +struct f2fs_orphan_block { + __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ + __le32 reserved; /* reserved */ + __le16 blk_addr; /* block index in current CP */ + __le16 blk_count; /* Number of orphan inode blocks in CP */ + __le32 entry_count; /* Total number of orphan nodes in current CP */ + __le32 check_sum; /* CRC32 for orphan inode block */ +} __packed; + +/* + * For NODE structure + */ +struct f2fs_extent { + __le32 fofs; /* start file offset of the extent */ + __le32 blk_addr; /* start block address of the extent */ + __le32 len; /* lengh of the extent */ +} __packed; + +#define F2FS_MAX_NAME_LEN 256 +#define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +struct f2fs_inode { + __le16 i_mode; /* file mode */ + __u8 i_advise; /* file hints */ + __u8 i_reserved; /* reserved */ + __le32 i_uid; /* user ID */ + __le32 i_gid; /* group ID */ + __le32 i_links; /* links count */ + __le64 i_size; /* file size in bytes */ + __le64 i_blocks; /* file size in blocks */ + __le64 i_atime; /* access time */ + __le64 i_ctime; /* change time */ + __le64 i_mtime; /* modification time */ + __le32 i_atime_nsec; /* access time in nano scale */ + __le32 i_ctime_nsec; /* change time in nano scale */ + __le32 i_mtime_nsec; /* modification time in nano scale */ + __le32 i_generation; /* file version (for NFS) */ + __le32 i_current_depth; /* only for directory depth */ + __le32 i_xattr_nid; /* nid to save xattr */ + __le32 i_flags; /* file attributes */ + __le32 i_pino; /* parent inode number */ + __le32 i_namelen; /* file name length */ + __u8 i_name[F2FS_MAX_NAME_LEN]; /* file name for SPOR */ + + struct f2fs_extent i_ext; /* caching a largest extent */ + + __le32 i_addr[ADDRS_PER_INODE]; /* Pointers to data blocks */ + + __le32 i_nid[5]; /* direct(2), indirect(2), + double_indirect(1) node id */ +} __packed; + +struct direct_node { + __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ +} __packed; + +struct indirect_node { + __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ +} __packed; + +enum { + COLD_BIT_SHIFT = 0, + FSYNC_BIT_SHIFT, + DENT_BIT_SHIFT, + OFFSET_BIT_SHIFT +}; + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode nunmber */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +struct f2fs_node { + /* can be one of three types: inode, direct, and indirect types */ + union { + struct f2fs_inode i; + struct direct_node dn; + struct indirect_node in; + }; + struct node_footer footer; +} __packed; + +/* + * For NAT entries + */ +#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) + +struct f2fs_nat_entry { + __u8 version; /* latest version of cached nat entry */ + __le32 ino; /* inode number */ + __le32 block_addr; /* block address */ +} __packed; + +struct f2fs_nat_block { + struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For SIT entries + * + * Each segment is 2MB in size by default so that a bitmap for validity of + * there-in blocks should occupy 64 bytes, 512 bits. + * Not allow to change this. + */ +#define SIT_VBLOCK_MAP_SIZE 64 +#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) + +/* + * Note that f2fs_sit_entry->vblocks has the following bit-field information. + * [15:10] : allocation type such as CURSEG_XXXX_TYPE + * [9:0] : valid block count + */ +#define SIT_VBLOCKS_SHIFT 10 +#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) +#define GET_SIT_VBLOCKS(raw_sit) \ + (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) +#define GET_SIT_TYPE(raw_sit) \ + ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ + >> SIT_VBLOCKS_SHIFT) + +struct f2fs_sit_entry { + __le16 vblocks; /* reference above */ + __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ + __le64 mtime; /* segment age for cleaning */ +} __packed; + +struct f2fs_sit_block { + struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For segment summary + * + * One summary block contains exactly 512 summary entries, which represents + * exactly 2MB segment by default. Not allow to change the basic units. + * + * NOTE: For initializing fields, you must use set_summary + * + * - If data page, nid represents dnode's nid + * - If node page, nid represents the node page's nid. + * + * The ofs_in_node is used by only data page. It represents offset + * from node's page's beginning to get a data block address. + * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) + */ +#define ENTRIES_IN_SUM 512 +#define SUMMARY_SIZE (sizeof(struct f2fs_summary)) +#define SUM_FOOTER_SIZE (sizeof(struct summary_footer)) +#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) + +/* a summary entry for a 4KB-sized block in a segment */ +struct f2fs_summary { + __le32 nid; /* parent node id */ + union { + __u8 reserved[3]; + struct { + __u8 version; /* node version number */ + __le16 ofs_in_node; /* block index in parent node */ + } __packed; + }; +} __packed; + +/* summary block type, node or data, is stored to the summary_footer */ +#define SUM_TYPE_NODE (1) +#define SUM_TYPE_DATA (0) + +struct summary_footer { + unsigned char entry_type; /* SUM_TYPE_XXX */ + __u32 check_sum; /* summary checksum */ +} __packed; + +#define SUM_JOURNAL_SIZE (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\ + SUM_ENTRY_SIZE) +#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct nat_journal_entry)) +#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct nat_journal_entry)) +#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct sit_journal_entry)) +#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct sit_journal_entry)) +/* + * frequently updated NAT/SIT entries can be stored in the spare area in + * summary blocks + */ +enum { + NAT_JOURNAL = 0, + SIT_JOURNAL +}; + +struct nat_journal_entry { + __le32 nid; + struct f2fs_nat_entry ne; +} __packed; + +struct nat_journal { + struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; + __u8 reserved[NAT_JOURNAL_RESERVED]; +} __packed; + +struct sit_journal_entry { + __le32 segno; + struct f2fs_sit_entry se; +} __packed; + +struct sit_journal { + struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; + __u8 reserved[SIT_JOURNAL_RESERVED]; +} __packed; + +/* 4KB-sized summary block structure */ +struct f2fs_summary_block { + struct f2fs_summary entries[ENTRIES_IN_SUM]; + union { + __le16 n_nats; + __le16 n_sits; + }; + /* spare area is used by NAT or SIT journals */ + union { + struct nat_journal nat_j; + struct sit_journal sit_j; + }; + struct summary_footer footer; +} __packed; + +/* + * For directory operations + */ +#define F2FS_DOT_HASH 0 +#define F2FS_DDOT_HASH F2FS_DOT_HASH +#define F2FS_MAX_HASH (~((0x3ULL) << 62)) +#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) + +typedef __le32 f2fs_hash_t; + +/* One directory entry slot covers 8bytes-long file name */ +#define F2FS_NAME_LEN 8 + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK 214 + +/* MAX level for dir lookup */ +#define MAX_DIR_HASH_DEPTH 63 + +#define SIZE_OF_DIR_ENTRY 11 /* by byte */ +#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ + BITS_PER_BYTE) +#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ + F2FS_NAME_LEN) * \ + NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) + +/* One directory entry slot representing F2FS_NAME_LEN-sized file name */ +struct f2fs_dir_entry { + __le32 hash_code; /* hash code of file name */ + __le32 ino; /* inode number */ + __le16 name_len; /* lengh of file name */ + __u8 file_type; /* file type */ +} __packed; + +/* 4KB-sized directory entry block */ +struct f2fs_dentry_block { + /* validity bitmap for directory entries in each block */ + __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; + __u8 reserved[SIZE_OF_RESERVED]; + struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN]; +} __packed; + +/* file types used in inode_info->flags */ +enum { + F2FS_FT_UNKNOWN, + F2FS_FT_REG_FILE, + F2FS_FT_DIR, + F2FS_FT_CHRDEV, + F2FS_FT_BLKDEV, + F2FS_FT_FIFO, + F2FS_FT_SOCK, + F2FS_FT_SYMLINK, + F2FS_FT_MAX +}; + +#endif /* _LINUX_F2FS_FS_H */ -- cgit v1.2.3 From 25ca923b2a766b9c93b63777ead351137533a623 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Nov 2012 16:12:41 +0900 Subject: f2fs: fix endian conversion bugs reported by sparse This patch should resolve the bugs reported by the sparse tool. Initial reports were written by "kbuild test robot" managed by fengguang.wu. In my local machines, I've tested also by running: > make C=2 CF="-D__CHECK_ENDIAN__" Accordingly, I've found lots of warnings and bugs related to the endian conversion. And I've fixed all at this moment. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 32 +++++++++++++++++--------------- fs/f2fs/data.c | 2 +- fs/f2fs/debug.c | 2 +- fs/f2fs/dir.c | 8 ++++---- fs/f2fs/f2fs.h | 25 +++++++++++++++++++++++-- fs/f2fs/hash.c | 3 +-- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 2 +- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 14 +++++++------- fs/f2fs/super.c | 8 ++++---- include/linux/f2fs_fs.h | 6 +++--- 12 files changed, 65 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ab743f92ee06..7c18f8efaadc 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -268,7 +268,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blkaddr, i, j; - if (!(F2FS_CKPT(sbi)->ckpt_flags & CP_ORPHAN_PRESENT_FLAG)) + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return 0; sbi->por_doing = 1; @@ -287,7 +287,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) f2fs_put_page(page, 1); } /* clear Orphan Flag */ - F2FS_CKPT(sbi)->ckpt_flags &= (~CP_ORPHAN_PRESENT_FLAG); + clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); sbi->por_doing = 0; return 0; } @@ -376,7 +376,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, pre_version = le64_to_cpu(cp_block->checkpoint_ver); /* Read the 2nd cp block in this CP pack */ - cp_addr += le64_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; cp_page_2 = get_meta_page(sbi, cp_addr); cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); @@ -605,8 +605,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) block_t start_blk; struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; + unsigned int crc32 = 0; void *kaddr; - __u32 crc32 = 0; int i; /* Flush all the NAT/SIT pages */ @@ -646,33 +646,35 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi); if (data_sum_blocks < 3) - ckpt->ckpt_flags |= CP_COMPACT_SUM_FLAG; + set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else - ckpt->ckpt_flags &= (~CP_COMPACT_SUM_FLAG); + clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) / F2FS_ORPHANS_PER_BLOCK; - ckpt->cp_pack_start_sum = 1 + orphan_blocks; - ckpt->cp_pack_total_block_count = 2 + data_sum_blocks + orphan_blocks; + ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); if (is_umount) { - ckpt->ckpt_flags |= CP_UMOUNT_FLAG; - ckpt->cp_pack_total_block_count += NR_CURSEG_NODE_TYPE; + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { - ckpt->ckpt_flags &= (~CP_UMOUNT_FLAG); + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks); } if (sbi->n_orphans) - ckpt->ckpt_flags |= CP_ORPHAN_PRESENT_FLAG; + set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else - ckpt->ckpt_flags &= (~CP_ORPHAN_PRESENT_FLAG); + clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); /* update SIT/NAT bitmap */ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); - *(__u32 *)((unsigned char *)ckpt + + *(__le32 *)((unsigned char *)ckpt + le32_to_cpu(ckpt->checksum_offset)) = cpu_to_le32(crc32); @@ -716,7 +718,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) sbi->alloc_valid_block_count = 0; /* Here, we only have one bio having CP pack */ - if (sbi->ckpt->ckpt_flags & CP_ERROR_FLAG) + if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) sbi->sb->s_flags |= MS_RDONLY; else sync_meta_pages(sbi, META_FLUSH, LONG_MAX); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c2fd0a80db16..5635cc5a9d4d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -545,7 +545,7 @@ redirty_out: #define MAX_DESIRED_PAGES_WP 4096 -int f2fs_write_data_pages(struct address_space *mapping, +static int f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a56181c1b28b..fb62960a1dc1 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -27,7 +27,7 @@ static LIST_HEAD(f2fs_stat_list); static struct dentry *debugfs_root; -void update_general_status(struct f2fs_sb_info *sbi) +static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = sbi->stat_info; int i; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 5975568d03df..5ec7a06120e1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -80,7 +80,7 @@ static bool early_match_name(const char *name, int namelen, if (le16_to_cpu(de->name_len) != namelen) return false; - if (le32_to_cpu(de->hash_code) != namehash) + if (de->hash_code != namehash) return false; return true; @@ -143,7 +143,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, nbucket = dir_buckets(level); nblock = bucket_blocks(level); - bidx = dir_block_index(level, namehash % nbucket); + bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); end_block = bidx + nblock; for (; bidx < end_block; bidx++) { @@ -406,7 +406,7 @@ start: nbucket = dir_buckets(level); nblock = bucket_blocks(level); - bidx = dir_block_index(level, (dentry_hash % nbucket)); + bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { mutex_lock_op(sbi, DENTRY_OPS); @@ -437,7 +437,7 @@ add_dentry: wait_on_page_writeback(dentry_page); de = &dentry_blk->dentry[bit_pos]; - de->hash_code = cpu_to_le32(dentry_hash); + de->hash_code = dentry_hash; de->name_len = cpu_to_le16(namelen); memcpy(dentry_blk->filename[bit_pos], name, namelen); de->ino = cpu_to_le32(inode->i_ino); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d3f5a70e2a49..8d7fde1bda1e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -463,6 +463,26 @@ static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) sbi->s_dirty = 0; } +static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + return ckpt_flags & f; +} + +static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags |= f; + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags &= (~f); + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t) { mutex_lock_nested(&sbi->fs_lock[t], t); @@ -577,7 +597,8 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - int offset = (flag == NAT_BITMAP) ? ckpt->sit_ver_bitmap_bytesize : 0; + int offset = (flag == NAT_BITMAP) ? + le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; return &ckpt->sit_nat_version_bitmap + offset; } @@ -587,7 +608,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver); - start_addr = le64_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); /* * odd numbered checkpoint should at cp segment 0 diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 098a1963d7c7..beb155e8d06d 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -92,7 +92,6 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len) hash = buf[0]; minor_hash = buf[1]; - f2fs_hash = hash; - f2fs_hash &= ~F2FS_HASH_COL_BIT; + f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); return f2fs_hash; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 216f04dc1177..5d421fe22575 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1445,8 +1445,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); dst->i.i_size = 0; - dst->i.i_blocks = 1; - dst->i.i_links = 1; + dst->i.i_blocks = cpu_to_le64(1); + dst->i.i_links = cpu_to_le32(1); dst->i.i_xattr_nid = 0; new_ni = old_ni; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5d525ed312ba..0ab92d643052 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -177,7 +177,7 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) void *kaddr = page_address(page); struct f2fs_node *rn = (struct f2fs_node *)kaddr; rn->footer.cp_ver = ckpt->checkpoint_ver; - rn->footer.next_blkaddr = blkaddr; + rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } static inline nid_t ino_of_node(struct page *node_page) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7a43df0b72c1..222a7bb92214 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -81,7 +81,7 @@ static int recover_inode(struct inode *inode, struct page *node_page) struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); - inode->i_mode = le32_to_cpu(raw_inode->i_mode); + inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_size_write(inode, le64_to_cpu(raw_inode->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ed7c079cfc7f..d973c56e8bd6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -630,7 +630,7 @@ static void f2fs_end_io_write(struct bio *bio, int err) SetPageError(page); if (page->mapping) set_bit(AS_EIO, &page->mapping->flags); - p->sbi->ckpt->ckpt_flags |= CP_ERROR_FLAG; + set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); set_page_dirty(page); } end_page_writeback(page); @@ -1067,7 +1067,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) segno = le32_to_cpu(ckpt->cur_data_segno[type]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - CURSEG_HOT_DATA]); - if (ckpt->ckpt_flags & CP_UMOUNT_FLAG) + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); else blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); @@ -1076,7 +1076,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) CURSEG_HOT_NODE]); blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - CURSEG_HOT_NODE]); - if (ckpt->ckpt_flags & CP_UMOUNT_FLAG) + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, type - CURSEG_HOT_NODE); else @@ -1087,7 +1087,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { - if (ckpt->ckpt_flags & CP_UMOUNT_FLAG) { + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { struct f2fs_summary *ns = &sum->entries[0]; int i; for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { @@ -1119,7 +1119,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { int type = CURSEG_HOT_DATA; - if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG) { + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { /* restore for compacted data summary */ if (read_compacted_summaries(sbi)) return -EINVAL; @@ -1208,7 +1208,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi, void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG) + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) write_compacted_summaries(sbi, start_blk); else write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); @@ -1216,7 +1216,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG) + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); return; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8661c93538af..878bf382f848 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -89,7 +89,7 @@ static void f2fs_i_callback(struct rcu_head *head) kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); } -void f2fs_destroy_inode(struct inode *inode) +static void f2fs_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, f2fs_i_callback); } @@ -445,7 +445,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (sanity_check_raw_super(raw_super)) goto free_sb_buf; - sb->s_maxbytes = max_file_size(raw_super->log_blocksize); + sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); @@ -527,7 +527,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* if there are nt orphan nodes free them */ err = -EINVAL; - if (!(sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG) && + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && recover_orphan_inodes(sbi)) goto free_node_inode; @@ -547,7 +547,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } /* recover fsynced data */ - if (!(sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG) && + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && !test_opt(sbi, DISABLE_ROLL_FORWARD)) recover_fsync_data(sbi); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1429ece7caab..c2fbbc35c1e6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -272,8 +272,8 @@ struct f2fs_sit_block { * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ #define ENTRIES_IN_SUM 512 -#define SUMMARY_SIZE (sizeof(struct f2fs_summary)) -#define SUM_FOOTER_SIZE (sizeof(struct summary_footer)) +#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ #define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) /* a summary entry for a 4KB-sized block in a segment */ @@ -297,7 +297,7 @@ struct summary_footer { __u32 check_sum; /* summary checksum */ } __packed; -#define SUM_JOURNAL_SIZE (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\ +#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ SUM_ENTRY_SIZE) #define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ sizeof(struct nat_journal_entry)) -- cgit v1.2.3 From 457d08ee4fd91c8df17917ff2d32565e6adacbfc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Dec 2012 14:54:50 +0900 Subject: f2fs: introduce accessor to retrieve number of dentry slots Simplify code by providing the accessor macro to retrieve the number of dentry slots for a given filename length. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat --- fs/f2fs/dir.c | 13 +++++-------- include/linux/f2fs_fs.h | 3 +++ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fc02d8b43aea..d900c088c7c6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -99,8 +99,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, NR_DENTRY_IN_BLOCK, 0); while (bit_pos < NR_DENTRY_IN_BLOCK) { de = &dentry_blk->dentry[bit_pos]; - slots = (le16_to_cpu(de->name_len) + F2FS_NAME_LEN - 1) / - F2FS_NAME_LEN; + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); if (early_match_name(name, namelen, namehash, de)) { if (!memcmp(dentry_blk->filename[bit_pos], @@ -130,7 +129,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const char *name, int namelen, f2fs_hash_t namehash, struct page **res_page) { - int s = (namelen + F2FS_NAME_LEN - 1) / F2FS_NAME_LEN; + int s = GET_DENTRY_SLOTS(namelen); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; @@ -383,7 +382,7 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode) int namelen = dentry->d_name.len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; - int slots = (namelen + F2FS_NAME_LEN - 1) / F2FS_NAME_LEN; + int slots = GET_DENTRY_SLOTS(namelen); int err = 0; int i; @@ -465,8 +464,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct address_space *mapping = page->mapping; struct inode *dir = mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - int slots = (le16_to_cpu(dentry->name_len) + F2FS_NAME_LEN - 1) / - F2FS_NAME_LEN; + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); void *kaddr = page_address(page); int i; @@ -641,8 +639,7 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) file->f_pos += bit_pos - start_bit_pos; goto success; } - slots = (le16_to_cpu(de->name_len) + F2FS_NAME_LEN - 1) - / F2FS_NAME_LEN; + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); bit_pos += slots; } bit_pos = 0; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c2fbbc35c1e6..f9a12f6243a5 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -363,6 +363,9 @@ typedef __le32 f2fs_hash_t; /* One directory entry slot covers 8bytes-long file name */ #define F2FS_NAME_LEN 8 +#define F2FS_NAME_LEN_BITS 3 + +#define GET_DENTRY_SLOTS(x) ((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS) /* the number of dentry in a block */ #define NR_DENTRY_IN_BLOCK 214 -- cgit v1.2.3 From b0284de05e07d56ff7de154d0c9263788755f5eb Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 30 Nov 2012 10:09:42 +0000 Subject: ab8500_bm: Rename battery management platform data to something more logical The platform specific battery management configuration data structure is currently called 'bat' short for 'battery'; however, it contains information for all components of the battery management group, rather than information pertaining to the battery itself - there are other structures for that. So, in keeping with its structure namesake 'abx500_bm_data', we rename it to 'bm' here. Using similar logic, we're also renaming 'bmdevs_of_probe' to the more device specific 'ab8500_bm_of_probe'. Signed-off-by: Lee Jones --- drivers/power/ab8500_bmdata.c | 6 +-- drivers/power/ab8500_btemp.c | 60 ++++++++++----------- drivers/power/ab8500_charger.c | 24 ++++----- drivers/power/ab8500_fg.c | 96 +++++++++++++++++----------------- drivers/power/abx500_chargalg.c | 112 ++++++++++++++++++++-------------------- include/linux/mfd/abx500.h | 6 +-- 6 files changed, 152 insertions(+), 152 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c index df5a590d760e..c2fb2c554019 100644 --- a/drivers/power/ab8500_bmdata.c +++ b/drivers/power/ab8500_bmdata.c @@ -452,9 +452,9 @@ struct abx500_bm_data ab8500_bm_data = { .fg_params = &fg, }; -int __devinit bmdevs_of_probe(struct device *dev, - struct device_node *np, - struct abx500_bm_data **battery) +int __devinit ab8500_bm_of_probe(struct device *dev, + struct device_node *np, + struct abx500_bm_data **battery) { struct batres_vs_temp *tmp_batres_tbl; struct device_node *np_bat_supply; diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c index 5d1bf0bd6fa5..33ed0fccbd0e 100644 --- a/drivers/power/ab8500_btemp.c +++ b/drivers/power/ab8500_btemp.c @@ -78,7 +78,7 @@ struct ab8500_btemp_ranges { * @parent: Pointer to the struct ab8500 * @gpadc: Pointer to the struct gpadc * @fg: Pointer to the struct fg - * @bat: Pointer to the abx500_bm platform data + * @bm: Platform specific battery management information * @btemp_psy: Structure for BTEMP specific battery properties * @events: Structure for information about events triggered * @btemp_ranges: Battery temperature range structure @@ -95,7 +95,7 @@ struct ab8500_btemp { struct ab8500 *parent; struct ab8500_gpadc *gpadc; struct ab8500_fg *fg; - struct abx500_bm_data *bat; + struct abx500_bm_data *bm; struct power_supply btemp_psy; struct ab8500_btemp_events events; struct ab8500_btemp_ranges btemp_ranges; @@ -149,13 +149,13 @@ static int ab8500_btemp_batctrl_volt_to_res(struct ab8500_btemp *di, return (450000 * (v_batctrl)) / (1800 - v_batctrl); } - if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL) { + if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL) { /* * If the battery has internal NTC, we use the current * source to calculate the resistance, 7uA or 20uA */ rbs = (v_batctrl * 1000 - - di->bat->gnd_lift_resistance * inst_curr) + - di->bm->gnd_lift_resistance * inst_curr) / di->curr_source; } else { /* @@ -211,7 +211,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di, return 0; /* Only do this for batteries with internal NTC */ - if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) { + if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) { if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA) curr = BAT_CTRL_7U_ENA; else @@ -243,7 +243,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di, __func__); goto disable_curr_source; } - } else if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) { + } else if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) { dev_dbg(di->dev, "Disable BATCTRL curr source\n"); /* Write 0 to the curr bits */ @@ -459,9 +459,9 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di) int rbat, rntc, vntc; u8 id; - id = di->bat->batt_id; + id = di->bm->batt_id; - if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && + if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && id != BATTERY_UNKNOWN) { rbat = ab8500_btemp_get_batctrl_res(di); @@ -476,8 +476,8 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di) } temp = ab8500_btemp_res_to_temp(di, - di->bat->bat_type[id].r_to_t_tbl, - di->bat->bat_type[id].n_temp_tbl_elements, rbat); + di->bm->bat_type[id].r_to_t_tbl, + di->bm->bat_type[id].n_temp_tbl_elements, rbat); } else { vntc = ab8500_gpadc_convert(di->gpadc, BTEMP_BALL); if (vntc < 0) { @@ -493,8 +493,8 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di) rntc = 230000 * vntc / (VTVOUT_V - vntc); temp = ab8500_btemp_res_to_temp(di, - di->bat->bat_type[id].r_to_t_tbl, - di->bat->bat_type[id].n_temp_tbl_elements, rntc); + di->bm->bat_type[id].r_to_t_tbl, + di->bm->bat_type[id].n_temp_tbl_elements, rntc); prev = temp; } dev_dbg(di->dev, "Battery temperature is %d\n", temp); @@ -515,7 +515,7 @@ static int ab8500_btemp_id(struct ab8500_btemp *di) u8 i; di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA; - di->bat->batt_id = BATTERY_UNKNOWN; + di->bm->batt_id = BATTERY_UNKNOWN; res = ab8500_btemp_get_batctrl_res(di); if (res < 0) { @@ -524,23 +524,23 @@ static int ab8500_btemp_id(struct ab8500_btemp *di) } /* BATTERY_UNKNOWN is defined on position 0, skip it! */ - for (i = BATTERY_UNKNOWN + 1; i < di->bat->n_btypes; i++) { - if ((res <= di->bat->bat_type[i].resis_high) && - (res >= di->bat->bat_type[i].resis_low)) { + for (i = BATTERY_UNKNOWN + 1; i < di->bm->n_btypes; i++) { + if ((res <= di->bm->bat_type[i].resis_high) && + (res >= di->bm->bat_type[i].resis_low)) { dev_dbg(di->dev, "Battery detected on %s" " low %d < res %d < high: %d" " index: %d\n", - di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL ? + di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL ? "BATCTRL" : "BATTEMP", - di->bat->bat_type[i].resis_low, res, - di->bat->bat_type[i].resis_high, i); + di->bm->bat_type[i].resis_low, res, + di->bm->bat_type[i].resis_high, i); - di->bat->batt_id = i; + di->bm->batt_id = i; break; } } - if (di->bat->batt_id == BATTERY_UNKNOWN) { + if (di->bm->batt_id == BATTERY_UNKNOWN) { dev_warn(di->dev, "Battery identified as unknown" ", resistance %d Ohm\n", res); return -ENXIO; @@ -550,13 +550,13 @@ static int ab8500_btemp_id(struct ab8500_btemp *di) * We only have to change current source if the * detected type is Type 1, else we use the 7uA source */ - if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && - di->bat->batt_id == 1) { + if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && + di->bm->batt_id == 1) { dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n"); di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA; } - return di->bat->batt_id; + return di->bm->batt_id; } /** @@ -586,9 +586,9 @@ static void ab8500_btemp_periodic_work(struct work_struct *work) } if (di->events.ac_conn || di->events.usb_conn) - interval = di->bat->temp_interval_chg; + interval = di->bm->temp_interval_chg; else - interval = di->bat->temp_interval_nochg; + interval = di->bm->temp_interval_nochg; /* Schedule a new measurement */ queue_delayed_work(di->btemp_wq, @@ -815,7 +815,7 @@ static int ab8500_btemp_get_property(struct power_supply *psy, val->intval = 1; break; case POWER_SUPPLY_PROP_TECHNOLOGY: - val->intval = di->bat->bat_type[di->bat->batt_id].name; + val->intval = di->bm->bat_type[di->bm->batt_id].name; break; case POWER_SUPPLY_PROP_TEMP: val->intval = ab8500_btemp_get_temp(di); @@ -985,10 +985,10 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s no mem for ab8500_btemp\n", __func__); return -ENOMEM; } - di->bat = pdev->mfd_cell->platform_data; - if (!di->bat) { + di->bm = pdev->mfd_cell->platform_data; + if (!di->bm) { if (np) { - ret = bmdevs_of_probe(&pdev->dev, np, &di->bat); + ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index d27dd7fec163..21dc8422778d 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -189,7 +189,7 @@ struct ab8500_charger_usb_state { * @autopower_cfg platform specific power config support for "pwron after pwrloss" * @parent: Pointer to the struct ab8500 * @gpadc: Pointer to the struct gpadc - * @bat: Pointer to the abx500_bm platform data + * @bm: Platform specific battery management information * @flags: Structure for information about events triggered * @usb_state: Structure for usb stack information * @ac_chg: AC charger power supply @@ -226,7 +226,7 @@ struct ab8500_charger { bool autopower_cfg; struct ab8500 *parent; struct ab8500_gpadc *gpadc; - struct abx500_bm_data *bat; + struct abx500_bm_data *bm; struct ab8500_charger_event_flags flags; struct ab8500_charger_usb_state usb_state; struct ux500_charger ac_chg; @@ -1034,7 +1034,7 @@ static int ab8500_charger_set_vbus_in_curr(struct ab8500_charger *di, int min_value; /* We should always use to lowest current limit */ - min_value = min(di->bat->chg_params->usb_curr_max, ich_in); + min_value = min(di->bm->chg_params->usb_curr_max, ich_in); switch (min_value) { case 100: @@ -1176,7 +1176,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger, volt_index = ab8500_voltage_to_regval(vset); curr_index = ab8500_current_to_regval(iset); input_curr_index = ab8500_current_to_regval( - di->bat->chg_params->ac_curr_max); + di->bm->chg_params->ac_curr_max); if (volt_index < 0 || curr_index < 0 || input_curr_index < 0) { dev_err(di->dev, "Charger voltage or current too high, " @@ -1193,7 +1193,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger, } /* MainChInputCurr: current that can be drawn from the charger*/ ret = ab8500_charger_set_main_in_curr(di, - di->bat->chg_params->ac_curr_max); + di->bm->chg_params->ac_curr_max); if (ret) { dev_err(di->dev, "%s Failed to set MainChInputCurr\n", __func__); @@ -1209,7 +1209,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger, } /* Check if VBAT overshoot control should be enabled */ - if (!di->bat->enable_overshoot) + if (!di->bm->enable_overshoot) overshoot = MAIN_CH_NO_OVERSHOOT_ENA_N; /* Enable Main Charger */ @@ -1376,7 +1376,7 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger, return ret; } /* Check if VBAT overshoot control should be enabled */ - if (!di->bat->enable_overshoot) + if (!di->bm->enable_overshoot) overshoot = USB_CHG_NO_OVERSHOOT_ENA_N; /* Enable USB Charger */ @@ -2454,8 +2454,8 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di) ret = abx500_set_register_interruptible(di->dev, AB8500_RTC, AB8500_RTC_BACKUP_CHG_REG, - di->bat->bkup_bat_v | - di->bat->bkup_bat_i); + di->bm->bkup_bat_v | + di->bm->bkup_bat_i); if (ret) { dev_err(di->dev, "failed to setup backup battery charging\n"); goto out; @@ -2644,10 +2644,10 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s no mem for ab8500_charger\n", __func__); return -ENOMEM; } - di->bat = pdev->mfd_cell->platform_data; - if (!di->bat) { + di->bm = pdev->mfd_cell->platform_data; + if (!di->bm) { if (np) { - ret = bmdevs_of_probe(&pdev->dev, np, &di->bat); + ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 5a9f58d4c0fb..4cf231375de3 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -173,7 +173,7 @@ struct inst_curr_result_list { * @avg_cap: Average capacity filter * @parent: Pointer to the struct ab8500 * @gpadc: Pointer to the struct gpadc - * @bat: Pointer to the abx500_bm platform data + * @bm: Platform specific battery management information * @fg_psy: Structure that holds the FG specific battery properties * @fg_wq: Work queue for running the FG algorithm * @fg_periodic_work: Work to run the FG algorithm periodically @@ -212,7 +212,7 @@ struct ab8500_fg { struct ab8500_fg_avg_cap avg_cap; struct ab8500 *parent; struct ab8500_gpadc *gpadc; - struct abx500_bm_data *bat; + struct abx500_bm_data *bm; struct power_supply fg_psy; struct workqueue_struct *fg_wq; struct delayed_work fg_periodic_work; @@ -355,7 +355,7 @@ static int ab8500_fg_is_low_curr(struct ab8500_fg *di, int curr) /* * We want to know if we're in low current mode */ - if (curr > -di->bat->fg_params->high_curr_threshold) + if (curr > -di->bm->fg_params->high_curr_threshold) return true; else return false; @@ -648,7 +648,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res) * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm */ val = (val * QLSB_NANO_AMP_HOURS_X10 * 36 * 4) / - (1000 * di->bat->fg_res); + (1000 * di->bm->fg_res); if (di->turn_off_fg) { dev_dbg(di->dev, "%s Disable FG\n", __func__); @@ -751,7 +751,7 @@ static void ab8500_fg_acc_cur_work(struct work_struct *work) * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm */ di->accu_charge = (val * QLSB_NANO_AMP_HOURS_X10) / - (100 * di->bat->fg_res); + (100 * di->bm->fg_res); /* * Convert to unit value in mA @@ -763,7 +763,7 @@ static void ab8500_fg_acc_cur_work(struct work_struct *work) * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm */ di->avg_curr = (val * QLSB_NANO_AMP_HOURS_X10 * 36) / - (1000 * di->bat->fg_res * (di->fg_samples / 4)); + (1000 * di->bm->fg_res * (di->fg_samples / 4)); di->flags.conv_done = true; @@ -815,8 +815,8 @@ static int ab8500_fg_volt_to_capacity(struct ab8500_fg *di, int voltage) struct abx500_v_to_cap *tbl; int cap = 0; - tbl = di->bat->bat_type[di->bat->batt_id].v_to_cap_tbl, - tbl_size = di->bat->bat_type[di->bat->batt_id].n_v_cap_tbl_elements; + tbl = di->bm->bat_type[di->bm->batt_id].v_to_cap_tbl, + tbl_size = di->bm->bat_type[di->bm->batt_id].n_v_cap_tbl_elements; for (i = 0; i < tbl_size; ++i) { if (voltage > tbl[i].voltage) @@ -867,8 +867,8 @@ static int ab8500_fg_battery_resistance(struct ab8500_fg *di) struct batres_vs_temp *tbl; int resist = 0; - tbl = di->bat->bat_type[di->bat->batt_id].batres_tbl; - tbl_size = di->bat->bat_type[di->bat->batt_id].n_batres_tbl_elements; + tbl = di->bm->bat_type[di->bm->batt_id].batres_tbl; + tbl_size = di->bm->bat_type[di->bm->batt_id].n_batres_tbl_elements; for (i = 0; i < tbl_size; ++i) { if (di->bat_temp / 10 > tbl[i].temp) @@ -889,11 +889,11 @@ static int ab8500_fg_battery_resistance(struct ab8500_fg *di) dev_dbg(di->dev, "%s Temp: %d battery internal resistance: %d" " fg resistance %d, total: %d (mOhm)\n", - __func__, di->bat_temp, resist, di->bat->fg_res / 10, - (di->bat->fg_res / 10) + resist); + __func__, di->bat_temp, resist, di->bm->fg_res / 10, + (di->bm->fg_res / 10) + resist); /* fg_res variable is in 0.1mOhm */ - resist += di->bat->fg_res / 10; + resist += di->bm->fg_res / 10; return resist; } @@ -1111,14 +1111,14 @@ static int ab8500_fg_capacity_level(struct ab8500_fg *di) percent = di->bat_cap.permille / 10; - if (percent <= di->bat->cap_levels->critical || + if (percent <= di->bm->cap_levels->critical || di->flags.low_bat) ret = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL; - else if (percent <= di->bat->cap_levels->low) + else if (percent <= di->bm->cap_levels->low) ret = POWER_SUPPLY_CAPACITY_LEVEL_LOW; - else if (percent <= di->bat->cap_levels->normal) + else if (percent <= di->bm->cap_levels->normal) ret = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL; - else if (percent <= di->bat->cap_levels->high) + else if (percent <= di->bm->cap_levels->high) ret = POWER_SUPPLY_CAPACITY_LEVEL_HIGH; else ret = POWER_SUPPLY_CAPACITY_LEVEL_FULL; @@ -1183,7 +1183,7 @@ static void ab8500_fg_check_capacity_limits(struct ab8500_fg *di, bool init) di->bat_cap.prev_percent != (di->bat_cap.permille) / 10 && (di->bat_cap.permille / 10) < - di->bat->fg_params->maint_thres) { + di->bm->fg_params->maint_thres) { dev_dbg(di->dev, "battery reported full " "but capacity dropping: %d\n", @@ -1285,7 +1285,7 @@ static void ab8500_fg_algorithm_charging(struct ab8500_fg *di) switch (di->charge_state) { case AB8500_FG_CHARGE_INIT: di->fg_samples = SEC_TO_SAMPLE( - di->bat->fg_params->accu_charging); + di->bm->fg_params->accu_charging); ab8500_fg_coulomb_counter(di, true); ab8500_fg_charge_state_to(di, AB8500_FG_CHARGE_READOUT); @@ -1347,8 +1347,8 @@ static bool check_sysfs_capacity(struct ab8500_fg *di) cap_permille = ab8500_fg_convert_mah_to_permille(di, di->bat_cap.user_mah); - lower = di->bat_cap.permille - di->bat->fg_params->user_cap_limit * 10; - upper = di->bat_cap.permille + di->bat->fg_params->user_cap_limit * 10; + lower = di->bat_cap.permille - di->bm->fg_params->user_cap_limit * 10; + upper = di->bat_cap.permille + di->bm->fg_params->user_cap_limit * 10; if (lower < 0) lower = 0; @@ -1388,7 +1388,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) case AB8500_FG_DISCHARGE_INIT: /* We use the FG IRQ to work on */ di->init_cnt = 0; - di->fg_samples = SEC_TO_SAMPLE(di->bat->fg_params->init_timer); + di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer); ab8500_fg_coulomb_counter(di, true); ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_INITMEASURING); @@ -1401,17 +1401,17 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) * samples to get an initial capacity. * Then go to READOUT */ - sleep_time = di->bat->fg_params->init_timer; + sleep_time = di->bm->fg_params->init_timer; /* Discard the first [x] seconds */ - if (di->init_cnt > di->bat->fg_params->init_discard_time) { + if (di->init_cnt > di->bm->fg_params->init_discard_time) { ab8500_fg_calc_cap_discharge_voltage(di, true); ab8500_fg_check_capacity_limits(di, true); } di->init_cnt += sleep_time; - if (di->init_cnt > di->bat->fg_params->init_total_time) + if (di->init_cnt > di->bm->fg_params->init_total_time) ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_READOUT_INIT); @@ -1426,7 +1426,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) /* Intentional fallthrough */ case AB8500_FG_DISCHARGE_RECOVERY: - sleep_time = di->bat->fg_params->recovery_sleep_timer; + sleep_time = di->bm->fg_params->recovery_sleep_timer; /* * We should check the power consumption @@ -1438,9 +1438,9 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) if (ab8500_fg_is_low_curr(di, di->inst_curr)) { if (di->recovery_cnt > - di->bat->fg_params->recovery_total_time) { + di->bm->fg_params->recovery_total_time) { di->fg_samples = SEC_TO_SAMPLE( - di->bat->fg_params->accu_high_curr); + di->bm->fg_params->accu_high_curr); ab8500_fg_coulomb_counter(di, true); ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_READOUT); @@ -1453,7 +1453,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) di->recovery_cnt += sleep_time; } else { di->fg_samples = SEC_TO_SAMPLE( - di->bat->fg_params->accu_high_curr); + di->bm->fg_params->accu_high_curr); ab8500_fg_coulomb_counter(di, true); ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_READOUT); @@ -1462,7 +1462,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) case AB8500_FG_DISCHARGE_READOUT_INIT: di->fg_samples = SEC_TO_SAMPLE( - di->bat->fg_params->accu_high_curr); + di->bm->fg_params->accu_high_curr); ab8500_fg_coulomb_counter(di, true); ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_READOUT); @@ -1509,9 +1509,9 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) } di->high_curr_cnt += - di->bat->fg_params->accu_high_curr; + di->bm->fg_params->accu_high_curr; if (di->high_curr_cnt > - di->bat->fg_params->high_curr_time) + di->bm->fg_params->high_curr_time) di->recovery_needed = true; ab8500_fg_calc_cap_discharge_fg(di); @@ -1528,7 +1528,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di) ab8500_fg_calc_cap_discharge_voltage(di, true); di->fg_samples = SEC_TO_SAMPLE( - di->bat->fg_params->accu_high_curr); + di->bm->fg_params->accu_high_curr); ab8500_fg_coulomb_counter(di, true); ab8500_fg_discharge_state_to(di, AB8500_FG_DISCHARGE_READOUT); @@ -1721,7 +1721,7 @@ static void ab8500_fg_low_bat_work(struct work_struct *work) vbat = ab8500_fg_bat_voltage(di); /* Check if LOW_BAT still fulfilled */ - if (vbat < di->bat->fg_params->lowbat_threshold) { + if (vbat < di->bm->fg_params->lowbat_threshold) { di->flags.low_bat = true; dev_warn(di->dev, "Battery voltage still LOW\n"); @@ -1779,8 +1779,8 @@ static int ab8500_fg_battok_init_hw_register(struct ab8500_fg *di) int ret; int new_val; - sel0 = di->bat->fg_params->battok_falling_th_sel0; - sel1 = di->bat->fg_params->battok_raising_th_sel1; + sel0 = di->bm->fg_params->battok_falling_th_sel0; + sel1 = di->bm->fg_params->battok_raising_th_sel1; cbp_sel0 = ab8500_fg_battok_calc(di, sel0); cbp_sel1 = ab8500_fg_battok_calc(di, sel1); @@ -1963,7 +1963,7 @@ static int ab8500_fg_get_property(struct power_supply *psy, di->bat_cap.max_mah); break; case POWER_SUPPLY_PROP_ENERGY_NOW: - if (di->flags.batt_unknown && !di->bat->chg_unknown_bat && + if (di->flags.batt_unknown && !di->bm->chg_unknown_bat && di->flags.batt_id_received) val->intval = ab8500_fg_convert_mah_to_uwh(di, di->bat_cap.max_mah); @@ -1978,21 +1978,21 @@ static int ab8500_fg_get_property(struct power_supply *psy, val->intval = di->bat_cap.max_mah; break; case POWER_SUPPLY_PROP_CHARGE_NOW: - if (di->flags.batt_unknown && !di->bat->chg_unknown_bat && + if (di->flags.batt_unknown && !di->bm->chg_unknown_bat && di->flags.batt_id_received) val->intval = di->bat_cap.max_mah; else val->intval = di->bat_cap.prev_mah; break; case POWER_SUPPLY_PROP_CAPACITY: - if (di->flags.batt_unknown && !di->bat->chg_unknown_bat && + if (di->flags.batt_unknown && !di->bm->chg_unknown_bat && di->flags.batt_id_received) val->intval = 100; else val->intval = di->bat_cap.prev_percent; break; case POWER_SUPPLY_PROP_CAPACITY_LEVEL: - if (di->flags.batt_unknown && !di->bat->chg_unknown_bat && + if (di->flags.batt_unknown && !di->bm->chg_unknown_bat && di->flags.batt_id_received) val->intval = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN; else @@ -2078,7 +2078,7 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data) if (!di->flags.batt_id_received) { const struct abx500_battery_type *b; - b = &(di->bat->bat_type[di->bat->batt_id]); + b = &(di->bm->bat_type[di->bm->batt_id]); di->flags.batt_id_received = true; @@ -2155,7 +2155,7 @@ static int ab8500_fg_init_hw_registers(struct ab8500_fg *di) AB8500_SYS_CTRL2_BLOCK, AB8500_LOW_BAT_REG, ab8500_volt_to_regval( - di->bat->fg_params->lowbat_threshold) << 1 | + di->bm->fg_params->lowbat_threshold) << 1 | LOW_BAT_ENABLE); if (ret) { dev_err(di->dev, "%s write failed\n", __func__); @@ -2457,10 +2457,10 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s no mem for ab8500_fg\n", __func__); return -ENOMEM; } - di->bat = pdev->mfd_cell->platform_data; - if (!di->bat) { + di->bm = pdev->mfd_cell->platform_data; + if (!di->bm) { if (np) { - ret = bmdevs_of_probe(&pdev->dev, np, &di->bat); + ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); @@ -2491,11 +2491,11 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev) di->fg_psy.external_power_changed = ab8500_fg_external_power_changed; di->bat_cap.max_mah_design = MILLI_TO_MICRO * - di->bat->bat_type[di->bat->batt_id].charge_full_design; + di->bm->bat_type[di->bm->batt_id].charge_full_design; di->bat_cap.max_mah = di->bat_cap.max_mah_design; - di->vbat_nom = di->bat->bat_type[di->bat->batt_id].nominal_voltage; + di->vbat_nom = di->bm->bat_type[di->bm->batt_id].nominal_voltage; di->init_capacity = true; @@ -2549,7 +2549,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev) goto free_inst_curr_wq; } - di->fg_samples = SEC_TO_SAMPLE(di->bat->fg_params->init_timer); + di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer); ab8500_fg_coulomb_counter(di, true); /* Initialize completion used to notify completion of inst current */ diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index dcdc4393b9e7..ea2e2eb652ef 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -207,7 +207,7 @@ enum maxim_ret { * @chg_info: information about connected charger types * @batt_data: data of the battery * @susp_status: current charger suspension status - * @bat: pointer to the abx500_bm platform data + * @bm: Platform specific battery management information * @chargalg_psy: structure that holds the battery properties exposed by * the charging algorithm * @events: structure for information about events triggered @@ -232,7 +232,7 @@ struct abx500_chargalg { struct abx500_chargalg_charger_info chg_info; struct abx500_chargalg_battery_data batt_data; struct abx500_chargalg_suspension_status susp_status; - struct abx500_bm_data *bat; + struct abx500_bm_data *bm; struct power_supply chargalg_psy; struct ux500_charger *ac_chg; struct ux500_charger *usb_chg; @@ -367,13 +367,13 @@ static void abx500_chargalg_start_safety_timer(struct abx500_chargalg *di) case AC_CHG: timer_expiration = round_jiffies(jiffies + - (di->bat->main_safety_tmr_h * 3600 * HZ)); + (di->bm->main_safety_tmr_h * 3600 * HZ)); break; case USB_CHG: timer_expiration = round_jiffies(jiffies + - (di->bat->usb_safety_tmr_h * 3600 * HZ)); + (di->bm->usb_safety_tmr_h * 3600 * HZ)); break; default: @@ -638,32 +638,32 @@ static void abx500_chargalg_start_charging(struct abx500_chargalg *di, */ static void abx500_chargalg_check_temp(struct abx500_chargalg *di) { - if (di->batt_data.temp > (di->bat->temp_low + di->t_hyst_norm) && - di->batt_data.temp < (di->bat->temp_high - di->t_hyst_norm)) { + if (di->batt_data.temp > (di->bm->temp_low + di->t_hyst_norm) && + di->batt_data.temp < (di->bm->temp_high - di->t_hyst_norm)) { /* Temp OK! */ di->events.btemp_underover = false; di->events.btemp_lowhigh = false; di->t_hyst_norm = 0; di->t_hyst_lowhigh = 0; } else { - if (((di->batt_data.temp >= di->bat->temp_high) && + if (((di->batt_data.temp >= di->bm->temp_high) && (di->batt_data.temp < - (di->bat->temp_over - di->t_hyst_lowhigh))) || + (di->bm->temp_over - di->t_hyst_lowhigh))) || ((di->batt_data.temp > - (di->bat->temp_under + di->t_hyst_lowhigh)) && - (di->batt_data.temp <= di->bat->temp_low))) { + (di->bm->temp_under + di->t_hyst_lowhigh)) && + (di->batt_data.temp <= di->bm->temp_low))) { /* TEMP minor!!!!! */ di->events.btemp_underover = false; di->events.btemp_lowhigh = true; - di->t_hyst_norm = di->bat->temp_hysteresis; + di->t_hyst_norm = di->bm->temp_hysteresis; di->t_hyst_lowhigh = 0; - } else if (di->batt_data.temp <= di->bat->temp_under || - di->batt_data.temp >= di->bat->temp_over) { + } else if (di->batt_data.temp <= di->bm->temp_under || + di->batt_data.temp >= di->bm->temp_over) { /* TEMP major!!!!! */ di->events.btemp_underover = true; di->events.btemp_lowhigh = false; di->t_hyst_norm = 0; - di->t_hyst_lowhigh = di->bat->temp_hysteresis; + di->t_hyst_lowhigh = di->bm->temp_hysteresis; } else { /* Within hysteresis */ dev_dbg(di->dev, "Within hysteresis limit temp: %d " @@ -682,12 +682,12 @@ static void abx500_chargalg_check_temp(struct abx500_chargalg *di) */ static void abx500_chargalg_check_charger_voltage(struct abx500_chargalg *di) { - if (di->chg_info.usb_volt > di->bat->chg_params->usb_volt_max) + if (di->chg_info.usb_volt > di->bm->chg_params->usb_volt_max) di->chg_info.usb_chg_ok = false; else di->chg_info.usb_chg_ok = true; - if (di->chg_info.ac_volt > di->bat->chg_params->ac_volt_max) + if (di->chg_info.ac_volt > di->bm->chg_params->ac_volt_max) di->chg_info.ac_chg_ok = false; else di->chg_info.ac_chg_ok = true; @@ -707,10 +707,10 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di) if (di->charge_status == POWER_SUPPLY_STATUS_CHARGING && di->charge_state == STATE_NORMAL && !di->maintenance_chg && (di->batt_data.volt >= - di->bat->bat_type[di->bat->batt_id].termination_vol || + di->bm->bat_type[di->bm->batt_id].termination_vol || di->events.usb_cv_active || di->events.ac_cv_active) && di->batt_data.avg_curr < - di->bat->bat_type[di->bat->batt_id].termination_curr && + di->bm->bat_type[di->bm->batt_id].termination_curr && di->batt_data.avg_curr > 0) { if (++di->eoc_cnt >= EOC_COND_CNT) { di->eoc_cnt = 0; @@ -733,12 +733,12 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di) static void init_maxim_chg_curr(struct abx500_chargalg *di) { di->ccm.original_iset = - di->bat->bat_type[di->bat->batt_id].normal_cur_lvl; + di->bm->bat_type[di->bm->batt_id].normal_cur_lvl; di->ccm.current_iset = - di->bat->bat_type[di->bat->batt_id].normal_cur_lvl; - di->ccm.test_delta_i = di->bat->maxi->charger_curr_step; - di->ccm.max_current = di->bat->maxi->chg_curr; - di->ccm.condition_cnt = di->bat->maxi->wait_cycles; + di->bm->bat_type[di->bm->batt_id].normal_cur_lvl; + di->ccm.test_delta_i = di->bm->maxi->charger_curr_step; + di->ccm.max_current = di->bm->maxi->chg_curr; + di->ccm.condition_cnt = di->bm->maxi->wait_cycles; di->ccm.level = 0; } @@ -755,7 +755,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di) { int delta_i; - if (!di->bat->maxi->ena_maxi) + if (!di->bm->maxi->ena_maxi) return MAXIM_RET_NOACTION; delta_i = di->ccm.original_iset - di->batt_data.inst_curr; @@ -766,7 +766,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di) if (di->ccm.wait_cnt == 0) { dev_dbg(di->dev, "lowering current\n"); di->ccm.wait_cnt++; - di->ccm.condition_cnt = di->bat->maxi->wait_cycles; + di->ccm.condition_cnt = di->bm->maxi->wait_cycles; di->ccm.max_current = di->ccm.current_iset - di->ccm.test_delta_i; di->ccm.current_iset = di->ccm.max_current; @@ -791,7 +791,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di) if (di->ccm.current_iset == di->ccm.original_iset) return MAXIM_RET_NOACTION; - di->ccm.condition_cnt = di->bat->maxi->wait_cycles; + di->ccm.condition_cnt = di->bm->maxi->wait_cycles; di->ccm.current_iset = di->ccm.original_iset; di->ccm.level = 0; @@ -803,7 +803,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di) di->ccm.max_current) { if (di->ccm.condition_cnt-- == 0) { /* Increse the iset with cco.test_delta_i */ - di->ccm.condition_cnt = di->bat->maxi->wait_cycles; + di->ccm.condition_cnt = di->bm->maxi->wait_cycles; di->ccm.current_iset += di->ccm.test_delta_i; di->ccm.level++; dev_dbg(di->dev, " Maximization needed, increase" @@ -818,7 +818,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di) return MAXIM_RET_NOACTION; } } else { - di->ccm.condition_cnt = di->bat->maxi->wait_cycles; + di->ccm.condition_cnt = di->bm->maxi->wait_cycles; return MAXIM_RET_NOACTION; } } @@ -838,7 +838,7 @@ static void handle_maxim_chg_curr(struct abx500_chargalg *di) break; case MAXIM_RET_IBAT_TOO_HIGH: result = abx500_chargalg_update_chg_curr(di, - di->bat->bat_type[di->bat->batt_id].normal_cur_lvl); + di->bm->bat_type[di->bm->batt_id].normal_cur_lvl); if (result) dev_err(di->dev, "failed to set chg curr\n"); break; @@ -1210,7 +1210,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) * this way */ if (!charger_status || - (di->events.batt_unknown && !di->bat->chg_unknown_bat)) { + (di->events.batt_unknown && !di->bm->chg_unknown_bat)) { if (di->charge_state != STATE_HANDHELD) { di->events.safety_timer_expired = false; abx500_chargalg_state_to(di, STATE_HANDHELD_INIT); @@ -1394,8 +1394,8 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) case STATE_NORMAL_INIT: abx500_chargalg_start_charging(di, - di->bat->bat_type[di->bat->batt_id].normal_vol_lvl, - di->bat->bat_type[di->bat->batt_id].normal_cur_lvl); + di->bm->bat_type[di->bm->batt_id].normal_vol_lvl, + di->bm->bat_type[di->bm->batt_id].normal_cur_lvl); abx500_chargalg_state_to(di, STATE_NORMAL); abx500_chargalg_start_safety_timer(di); abx500_chargalg_stop_maintenance_timer(di); @@ -1411,7 +1411,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) handle_maxim_chg_curr(di); if (di->charge_status == POWER_SUPPLY_STATUS_FULL && di->maintenance_chg) { - if (di->bat->no_maintenance) + if (di->bm->no_maintenance) abx500_chargalg_state_to(di, STATE_WAIT_FOR_RECHARGE_INIT); else @@ -1429,7 +1429,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) case STATE_WAIT_FOR_RECHARGE: if (di->batt_data.volt <= - di->bat->bat_type[di->bat->batt_id].recharge_vol) { + di->bm->bat_type[di->bm->batt_id].recharge_vol) { if (di->rch_cnt-- == 0) abx500_chargalg_state_to(di, STATE_NORMAL_INIT); } else @@ -1439,13 +1439,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) case STATE_MAINTENANCE_A_INIT: abx500_chargalg_stop_safety_timer(di); abx500_chargalg_start_maintenance_timer(di, - di->bat->bat_type[ - di->bat->batt_id].maint_a_chg_timer_h); + di->bm->bat_type[ + di->bm->batt_id].maint_a_chg_timer_h); abx500_chargalg_start_charging(di, - di->bat->bat_type[ - di->bat->batt_id].maint_a_vol_lvl, - di->bat->bat_type[ - di->bat->batt_id].maint_a_cur_lvl); + di->bm->bat_type[ + di->bm->batt_id].maint_a_vol_lvl, + di->bm->bat_type[ + di->bm->batt_id].maint_a_cur_lvl); abx500_chargalg_state_to(di, STATE_MAINTENANCE_A); power_supply_changed(&di->chargalg_psy); /* Intentional fallthrough*/ @@ -1459,13 +1459,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) case STATE_MAINTENANCE_B_INIT: abx500_chargalg_start_maintenance_timer(di, - di->bat->bat_type[ - di->bat->batt_id].maint_b_chg_timer_h); + di->bm->bat_type[ + di->bm->batt_id].maint_b_chg_timer_h); abx500_chargalg_start_charging(di, - di->bat->bat_type[ - di->bat->batt_id].maint_b_vol_lvl, - di->bat->bat_type[ - di->bat->batt_id].maint_b_cur_lvl); + di->bm->bat_type[ + di->bm->batt_id].maint_b_vol_lvl, + di->bm->bat_type[ + di->bm->batt_id].maint_b_cur_lvl); abx500_chargalg_state_to(di, STATE_MAINTENANCE_B); power_supply_changed(&di->chargalg_psy); /* Intentional fallthrough*/ @@ -1479,10 +1479,10 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di) case STATE_TEMP_LOWHIGH_INIT: abx500_chargalg_start_charging(di, - di->bat->bat_type[ - di->bat->batt_id].low_high_vol_lvl, - di->bat->bat_type[ - di->bat->batt_id].low_high_cur_lvl); + di->bm->bat_type[ + di->bm->batt_id].low_high_vol_lvl, + di->bm->bat_type[ + di->bm->batt_id].low_high_cur_lvl); abx500_chargalg_stop_maintenance_timer(di); di->charge_status = POWER_SUPPLY_STATUS_CHARGING; abx500_chargalg_state_to(di, STATE_TEMP_LOWHIGH); @@ -1543,11 +1543,11 @@ static void abx500_chargalg_periodic_work(struct work_struct *work) if (di->chg_info.conn_chg) queue_delayed_work(di->chargalg_wq, &di->chargalg_periodic_work, - di->bat->interval_charging * HZ); + di->bm->interval_charging * HZ); else queue_delayed_work(di->chargalg_wq, &di->chargalg_periodic_work, - di->bat->interval_not_charging * HZ); + di->bm->interval_not_charging * HZ); } /** @@ -1614,7 +1614,7 @@ static int abx500_chargalg_get_property(struct power_supply *psy, if (di->events.batt_ovv) { val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE; } else if (di->events.btemp_underover) { - if (di->batt_data.temp <= di->bat->temp_under) + if (di->batt_data.temp <= di->bm->temp_under) val->intval = POWER_SUPPLY_HEALTH_COLD; else val->intval = POWER_SUPPLY_HEALTH_OVERHEAT; @@ -1814,10 +1814,10 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s no mem for ab8500_chargalg\n", __func__); return -ENOMEM; } - di->bat = pdev->mfd_cell->platform_data; - if (!di->bat) { + di->bm = pdev->mfd_cell->platform_data; + if (!di->bm) { if (np) { - ret = bmdevs_of_probe(&pdev->dev, np, &di->bat); + ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 2138bd33021a..6ce749a0e9d4 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -279,9 +279,9 @@ enum { NTC_INTERNAL, }; -int bmdevs_of_probe(struct device *dev, - struct device_node *np, - struct abx500_bm_data **battery); +int ab8500_bm_of_probe(struct device *dev, + struct device_node *np, + struct abx500_bm_data **battery); int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg, u8 value); -- cgit v1.2.3 From 23a04f9f40f2b32ee593b768483105b1c776814d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 29 Nov 2012 15:08:41 +0000 Subject: ab8500_bm: Always send platform specific battery information via pdata Currently the AB8500 battery management subsystem receives platform specific information via two different means depending on how the platform is booted. If DT is not enabled, a reference to a *_bm_data data structure containing each platform specific attribute is passed though platform_data. However, if DT is enabled, then platform_data is empty and the reference is gained though a DT specific probe function. There are two issues here 1) the same reference is being collected each time and 2) the DT way doesn't allow any provisions to select different platform specific attributes, which kind of defeats the object. Cc: Samuel Ortiz Signed-off-by: Lee Jones --- drivers/mfd/ab8500-core.c | 8 -------- drivers/power/ab8500_bmdata.c | 33 ++++++++++++++------------------- drivers/power/ab8500_btemp.c | 2 +- drivers/power/ab8500_charger.c | 2 +- drivers/power/ab8500_fg.c | 2 +- drivers/power/abx500_chargalg.c | 2 +- include/linux/mfd/abx500.h | 2 +- 7 files changed, 19 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index 5ec70f26b9d5..bbd49d796902 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -1044,40 +1044,32 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = { .of_compatible = "stericsson,ab8500-charger", .num_resources = ARRAY_SIZE(ab8500_charger_resources), .resources = ab8500_charger_resources, -#ifndef CONFIG_OF .platform_data = &ab8500_bm_data, .pdata_size = sizeof(ab8500_bm_data), -#endif }, { .name = "ab8500-btemp", .of_compatible = "stericsson,ab8500-btemp", .num_resources = ARRAY_SIZE(ab8500_btemp_resources), .resources = ab8500_btemp_resources, -#ifndef CONFIG_OF .platform_data = &ab8500_bm_data, .pdata_size = sizeof(ab8500_bm_data), -#endif }, { .name = "ab8500-fg", .of_compatible = "stericsson,ab8500-fg", .num_resources = ARRAY_SIZE(ab8500_fg_resources), .resources = ab8500_fg_resources, -#ifndef CONFIG_OF .platform_data = &ab8500_bm_data, .pdata_size = sizeof(ab8500_bm_data), -#endif }, { .name = "ab8500-chargalg", .of_compatible = "stericsson,ab8500-chargalg", .num_resources = ARRAY_SIZE(ab8500_chargalg_resources), .resources = ab8500_chargalg_resources, -#ifndef CONFIG_OF .platform_data = &ab8500_bm_data, .pdata_size = sizeof(ab8500_bm_data), -#endif }, }; diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c index c2fb2c554019..6b772e5f515f 100644 --- a/drivers/power/ab8500_bmdata.c +++ b/drivers/power/ab8500_bmdata.c @@ -454,16 +454,13 @@ struct abx500_bm_data ab8500_bm_data = { int __devinit ab8500_bm_of_probe(struct device *dev, struct device_node *np, - struct abx500_bm_data **battery) + struct abx500_bm_data *bm) { struct batres_vs_temp *tmp_batres_tbl; struct device_node *np_bat_supply; - struct abx500_bm_data *bat; const char *btech; int i; - *battery = &ab8500_bm_data; - /* get phandle to 'battery-info' node */ np_bat_supply = of_parse_phandle(np, "battery", 0); if (!np_bat_supply) { @@ -477,16 +474,14 @@ int __devinit ab8500_bm_of_probe(struct device *dev, return -EINVAL; } - bat = *battery; - if (strncmp(btech, "LION", 4) == 0) { - bat->no_maintenance = true; - bat->chg_unknown_bat = true; - bat->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600; - bat->bat_type[BATTERY_UNKNOWN].termination_vol = 4150; - bat->bat_type[BATTERY_UNKNOWN].recharge_vol = 4130; - bat->bat_type[BATTERY_UNKNOWN].normal_cur_lvl = 520; - bat->bat_type[BATTERY_UNKNOWN].normal_vol_lvl = 4200; + bm->no_maintenance = true; + bm->chg_unknown_bat = true; + bm->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600; + bm->bat_type[BATTERY_UNKNOWN].termination_vol = 4150; + bm->bat_type[BATTERY_UNKNOWN].recharge_vol = 4130; + bm->bat_type[BATTERY_UNKNOWN].normal_cur_lvl = 520; + bm->bat_type[BATTERY_UNKNOWN].normal_vol_lvl = 4200; } if (of_property_read_bool(np_bat_supply, "thermistor-on-batctrl")) { @@ -495,15 +490,15 @@ int __devinit ab8500_bm_of_probe(struct device *dev, else tmp_batres_tbl = temp_to_batres_tbl_thermistor; } else { - bat->n_btypes = 4; - bat->bat_type = bat_type_ext_thermistor; - bat->adc_therm = ABx500_ADC_THERM_BATTEMP; - tmp_batres_tbl = temp_to_batres_tbl_ext_thermistor; + bm->n_btypes = 4; + bm->bat_type = bat_type_ext_thermistor; + bm->adc_therm = ABx500_ADC_THERM_BATTEMP; + tmp_batres_tbl = temp_to_batres_tbl_ext_thermistor; } /* select the battery resolution table */ - for (i = 0; i < bat->n_btypes; ++i) - bat->bat_type[i]->batres_tbl = tmp_batres_tbl; + for (i = 0; i < bm->n_btypes; ++i) + bm->bat_type[i].batres_tbl = tmp_batres_tbl; of_node_put(np_bat_supply); diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c index 33ed0fccbd0e..c0f7dcceae45 100644 --- a/drivers/power/ab8500_btemp.c +++ b/drivers/power/ab8500_btemp.c @@ -988,7 +988,7 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev) di->bm = pdev->mfd_cell->platform_data; if (!di->bm) { if (np) { - ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); + ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c index 21dc8422778d..c1077df26783 100644 --- a/drivers/power/ab8500_charger.c +++ b/drivers/power/ab8500_charger.c @@ -2647,7 +2647,7 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev) di->bm = pdev->mfd_cell->platform_data; if (!di->bm) { if (np) { - ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); + ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 4cf231375de3..8bb9df92627d 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -2460,7 +2460,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev) di->bm = pdev->mfd_cell->platform_data; if (!di->bm) { if (np) { - ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); + ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c index ea2e2eb652ef..22a511c2a719 100644 --- a/drivers/power/abx500_chargalg.c +++ b/drivers/power/abx500_chargalg.c @@ -1817,7 +1817,7 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev) di->bm = pdev->mfd_cell->platform_data; if (!di->bm) { if (np) { - ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm); + ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm); if (ret) { dev_err(&pdev->dev, "failed to get battery information\n"); diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 6ce749a0e9d4..4906b1842d2f 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -281,7 +281,7 @@ enum { int ab8500_bm_of_probe(struct device *dev, struct device_node *np, - struct abx500_bm_data **battery); + struct abx500_bm_data *bm); int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg, u8 value); -- cgit v1.2.3 From 3c58346525d82625e68e24f071804c2dc057b6f4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 28 Nov 2012 16:23:01 +0000 Subject: slab: Simplify bootstrap The nodelists field in kmem_cache is pointing to the first unused object in the array field when bootstrap is complete. A problem with the current approach is that the statically sized kmem_cache structure use on boot can only contain NR_CPUS entries. If the number of nodes plus the number of cpus is greater then we would overwrite memory following the kmem_cache_boot definition. Increase the size of the array field to ensure that also the node pointers fit into the array field. Once we do that we no longer need the kmem_cache_nodelists array and we can then also use that structure elsewhere. Acked-by: Glauber Costa Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 6 +++++- mm/slab.c | 21 +++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cc290f0bdb34..45c0356fdc8c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -89,9 +89,13 @@ struct kmem_cache { * (see kmem_cache_init()) * We still use [NR_CPUS] and not [1] or [0] because cache_cache * is statically defined, so we reserve the max number of cpus. + * + * We also need to guarantee that the list is able to accomodate a + * pointer for each node since "nodelists" uses the remainder of + * available pointers. */ struct kmem_list3 **nodelists; - struct array_cache *array[NR_CPUS]; + struct array_cache *array[NR_CPUS + MAX_NUMNODES]; /* * Do not add fields after array[] */ diff --git a/mm/slab.c b/mm/slab.c index e26bff5ed1a6..c7ea5234c4e9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -553,9 +553,7 @@ static struct arraycache_init initarray_generic = { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ -static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES]; static struct kmem_cache kmem_cache_boot = { - .nodelists = kmem_cache_nodelists, .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, @@ -1559,6 +1557,15 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) } } +/* + * The memory after the last cpu cache pointer is used for the + * the nodelists pointer. + */ +static void setup_nodelists_pointer(struct kmem_cache *cachep) +{ + cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; +} + /* * Initialisation. Called after the page allocator have been initialised and * before smp_init(). @@ -1573,15 +1580,14 @@ void __init kmem_cache_init(void) int node; kmem_cache = &kmem_cache_boot; + setup_nodelists_pointer(kmem_cache); if (num_possible_nodes() == 1) use_alien_caches = 0; - for (i = 0; i < NUM_INIT_LISTS; i++) { + for (i = 0; i < NUM_INIT_LISTS; i++) kmem_list3_init(&initkmem_list3[i]); - if (i < MAX_NUMNODES) - kmem_cache->nodelists[i] = NULL; - } + set_up_list3s(kmem_cache, CACHE_CACHE); /* @@ -1619,7 +1625,6 @@ void __init kmem_cache_init(void) list_add(&kmem_cache->list, &slab_caches); kmem_cache->colour_off = cache_line_size(); kmem_cache->array[smp_processor_id()] = &initarray_cache.cache; - kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; /* * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids @@ -2422,7 +2427,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) else gfp = GFP_NOWAIT; - cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; + setup_nodelists_pointer(cachep); #if DEBUG /* -- cgit v1.2.3 From 7da4d641c58d201c3cc1835c05ca1a7fa26f0856 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Nov 2012 03:14:23 +0100 Subject: mm: Count the number of pages affected in change_protection() This will be used for three kinds of purposes: - to optimize mprotect() - to speed up working set scanning for working set areas that have not been touched - to more accurately scan per real working set No change in functionality from this patch. Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Mel Gorman Cc: Hugh Dickins Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/hugetlb.h | 8 +++++-- include/linux/mm.h | 3 +++ mm/hugetlb.c | 10 +++++++-- mm/mprotect.c | 58 +++++++++++++++++++++++++++++++++++++------------ 4 files changed, 61 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 225164842ab6..06e691baab86 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -87,7 +87,7 @@ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int write); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pmd); -void hugetlb_change_protection(struct vm_area_struct *vma, +unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); #else /* !CONFIG_HUGETLB_PAGE */ @@ -132,7 +132,11 @@ static inline void copy_huge_page(struct page *dst, struct page *src) { } -#define hugetlb_change_protection(vma, address, end, newprot) +static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, + unsigned long address, unsigned long end, pgprot_t newprot) +{ + return 0; +} static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, diff --git a/include/linux/mm.h b/include/linux/mm.h index bcaab4e6fe91..1856c62d82cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1078,6 +1078,9 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); +extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgprot_t newprot, + int dirty_accountable); extern int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 59a0059b39e2..712895eda0d0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3014,7 +3014,7 @@ same_page: return i ? i : -EFAULT; } -void hugetlb_change_protection(struct vm_area_struct *vma, +unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) { struct mm_struct *mm = vma->vm_mm; @@ -3022,6 +3022,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, pte_t *ptep; pte_t pte; struct hstate *h = hstate_vma(vma); + unsigned long pages = 0; BUG_ON(address >= end); flush_cache_range(vma, address, end); @@ -3032,12 +3033,15 @@ void hugetlb_change_protection(struct vm_area_struct *vma, ptep = huge_pte_offset(mm, address); if (!ptep) continue; - if (huge_pmd_unshare(mm, &address, ptep)) + if (huge_pmd_unshare(mm, &address, ptep)) { + pages++; continue; + } if (!huge_pte_none(huge_ptep_get(ptep))) { pte = huge_ptep_get_and_clear(mm, address, ptep); pte = pte_mkhuge(pte_modify(pte, newprot)); set_huge_pte_at(mm, address, ptep, pte); + pages++; } } spin_unlock(&mm->page_table_lock); @@ -3049,6 +3053,8 @@ void hugetlb_change_protection(struct vm_area_struct *vma, */ flush_tlb_range(vma, start, end); mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + + return pages << h->order; } int hugetlb_reserve_pages(struct inode *inode, diff --git a/mm/mprotect.c b/mm/mprotect.c index a40992610ab6..1e265be25f85 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -35,12 +35,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) } #endif -static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, +static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable) { pte_t *pte, oldpte; spinlock_t *ptl; + unsigned long pages = 0; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); @@ -60,6 +61,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, ptent = pte_mkwrite(ptent); ptep_modify_prot_commit(mm, addr, pte, ptent); + pages++; } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); @@ -72,18 +74,22 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, set_pte_at(mm, addr, pte, swp_entry_to_pte(entry)); } + pages++; } } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); + + return pages; } -static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud, +static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable) { pmd_t *pmd; unsigned long next; + unsigned long pages = 0; pmd = pmd_offset(pud, addr); do { @@ -91,35 +97,42 @@ static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud, if (pmd_trans_huge(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) split_huge_page_pmd(vma->vm_mm, pmd); - else if (change_huge_pmd(vma, pmd, addr, newprot)) + else if (change_huge_pmd(vma, pmd, addr, newprot)) { + pages += HPAGE_PMD_NR; continue; + } /* fall through */ } if (pmd_none_or_clear_bad(pmd)) continue; - change_pte_range(vma->vm_mm, pmd, addr, next, newprot, + pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot, dirty_accountable); } while (pmd++, addr = next, addr != end); + + return pages; } -static inline void change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, +static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable) { pud_t *pud; unsigned long next; + unsigned long pages = 0; pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - change_pmd_range(vma, pud, addr, next, newprot, + pages += change_pmd_range(vma, pud, addr, next, newprot, dirty_accountable); } while (pud++, addr = next, addr != end); + + return pages; } -static void change_protection(struct vm_area_struct *vma, +static unsigned long change_protection_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable) { @@ -127,6 +140,7 @@ static void change_protection(struct vm_area_struct *vma, pgd_t *pgd; unsigned long next; unsigned long start = addr; + unsigned long pages = 0; BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); @@ -135,10 +149,30 @@ static void change_protection(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - change_pud_range(vma, pgd, addr, next, newprot, + pages += change_pud_range(vma, pgd, addr, next, newprot, dirty_accountable); } while (pgd++, addr = next, addr != end); + flush_tlb_range(vma, start, end); + + return pages; +} + +unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgprot_t newprot, + int dirty_accountable) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long pages; + + mmu_notifier_invalidate_range_start(mm, start, end); + if (is_vm_hugetlb_page(vma)) + pages = hugetlb_change_protection(vma, start, end, newprot); + else + pages = change_protection_range(vma, start, end, newprot, dirty_accountable); + mmu_notifier_invalidate_range_end(mm, start, end); + + return pages; } int @@ -213,12 +247,8 @@ success: dirty_accountable = 1; } - mmu_notifier_invalidate_range_start(mm, start, end); - if (is_vm_hugetlb_page(vma)) - hugetlb_change_protection(vma, start, end, vma->vm_page_prot); - else - change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); - mmu_notifier_invalidate_range_end(mm, start, end); + change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); + vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); perf_event_mmap(vma); -- cgit v1.2.3 From 5647bc293ab15f66a7b1cda850c5e9d162a6c7c2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 19 Oct 2012 10:46:20 +0100 Subject: mm: compaction: Move migration fail/success stats to migrate.c The compact_pages_moved and compact_pagemigrate_failed events are convenient for determining if compaction is active and to what degree migration is succeeding but it's at the wrong level. Other users of migration may also want to know if migration is working properly and this will be particularly true for any automated NUMA migration. This patch moves the counters down to migration with the new events called pgmigrate_success and pgmigrate_fail. The compact_blocks_moved counter is removed because while it was useful for debugging initially, it's worthless now as no meaningful conclusions can be drawn from its value. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/vm_event_item.h | 4 +++- mm/compaction.c | 4 ---- mm/migrate.c | 6 ++++++ mm/vmstat.c | 7 ++++--- 4 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 3d3114594370..8aa7cb94b5fb 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -38,8 +38,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, KSWAPD_SKIP_CONGESTION_WAIT, PAGEOUTRUN, ALLOCSTALL, PGROTATED, +#ifdef CONFIG_MIGRATION + PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, +#endif #ifdef CONFIG_COMPACTION - COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, #endif #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/compaction.c b/mm/compaction.c index 9eef55838fca..00ad88395216 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -994,10 +994,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; - count_vm_event(COMPACTBLOCKS); - count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining); - if (nr_remaining) - count_vm_events(COMPACTPAGEFAILED, nr_remaining); trace_mm_compaction_migratepages(nr_migrate - nr_remaining, nr_remaining); diff --git a/mm/migrate.c b/mm/migrate.c index 77ed2d773705..04687f69cc17 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -962,6 +962,7 @@ int migrate_pages(struct list_head *from, { int retry = 1; int nr_failed = 0; + int nr_succeeded = 0; int pass = 0; struct page *page; struct page *page2; @@ -988,6 +989,7 @@ int migrate_pages(struct list_head *from, retry++; break; case 0: + nr_succeeded++; break; default: /* Permanent failure */ @@ -998,6 +1000,10 @@ int migrate_pages(struct list_head *from, } rc = 0; out: + if (nr_succeeded) + count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); + if (nr_failed) + count_vm_events(PGMIGRATE_FAIL, nr_failed); if (!swapwrite) current->flags &= ~PF_SWAPWRITE; diff --git a/mm/vmstat.c b/mm/vmstat.c index c7370579111b..89a7fd665b32 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -774,10 +774,11 @@ const char * const vmstat_text[] = { "pgrotated", +#ifdef CONFIG_MIGRATION + "pgmigrate_success", + "pgmigrate_fail", +#endif #ifdef CONFIG_COMPACTION - "compact_blocks_moved", - "compact_pages_moved", - "compact_pagemigrate_failed", "compact_stall", "compact_fail", "compact_success", -- cgit v1.2.3 From 7b2a2d4a18fffac3c4872021529b0657896db788 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 19 Oct 2012 14:07:31 +0100 Subject: mm: migrate: Add a tracepoint for migrate_pages The pgmigrate_success and pgmigrate_fail vmstat counters tells the user about migration activity but not the type or the reason. This patch adds a tracepoint to identify the type of page migration and why the page is being migrated. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/migrate.h | 13 +++++++++-- include/trace/events/migrate.h | 51 ++++++++++++++++++++++++++++++++++++++++++ mm/compaction.c | 3 ++- mm/memory-failure.c | 3 ++- mm/memory_hotplug.c | 3 ++- mm/mempolicy.c | 6 +++-- mm/migrate.c | 10 +++++++-- mm/page_alloc.c | 3 ++- 8 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 include/trace/events/migrate.h (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ce7e6671968b..9d1c159e2427 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,6 +7,15 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); +enum migrate_reason { + MR_COMPACTION, + MR_MEMORY_FAILURE, + MR_MEMORY_HOTPLUG, + MR_SYSCALL, /* also applies to cpusets */ + MR_MEMPOLICY_MBIND, + MR_CMA +}; + #ifdef CONFIG_MIGRATION extern void putback_lru_pages(struct list_head *l); @@ -14,7 +23,7 @@ extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - enum migrate_mode mode); + enum migrate_mode mode, int reason); extern int migrate_huge_page(struct page *, new_page_t x, unsigned long private, bool offlining, enum migrate_mode mode); @@ -35,7 +44,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, static inline void putback_lru_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, - enum migrate_mode mode) { return -ENOSYS; } + enum migrate_mode mode, int reason) { return -ENOSYS; } static inline int migrate_huge_page(struct page *page, new_page_t x, unsigned long private, bool offlining, enum migrate_mode mode) { return -ENOSYS; } diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h new file mode 100644 index 000000000000..ec2a6ccfd7e5 --- /dev/null +++ b/include/trace/events/migrate.h @@ -0,0 +1,51 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM migrate + +#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MIGRATE_H + +#define MIGRATE_MODE \ + {MIGRATE_ASYNC, "MIGRATE_ASYNC"}, \ + {MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT"}, \ + {MIGRATE_SYNC, "MIGRATE_SYNC"} + +#define MIGRATE_REASON \ + {MR_COMPACTION, "compaction"}, \ + {MR_MEMORY_FAILURE, "memory_failure"}, \ + {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \ + {MR_SYSCALL, "syscall_or_cpuset"}, \ + {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \ + {MR_CMA, "cma"} + +TRACE_EVENT(mm_migrate_pages, + + TP_PROTO(unsigned long succeeded, unsigned long failed, + enum migrate_mode mode, int reason), + + TP_ARGS(succeeded, failed, mode, reason), + + TP_STRUCT__entry( + __field( unsigned long, succeeded) + __field( unsigned long, failed) + __field( enum migrate_mode, mode) + __field( int, reason) + ), + + TP_fast_assign( + __entry->succeeded = succeeded; + __entry->failed = failed; + __entry->mode = mode; + __entry->reason = reason; + ), + + TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s", + __entry->succeeded, + __entry->failed, + __print_symbolic(__entry->mode, MIGRATE_MODE), + __print_symbolic(__entry->reason, MIGRATE_REASON)) +); + +#endif /* _TRACE_MIGRATE_H */ + +/* This part must be outside protection */ +#include diff --git a/mm/compaction.c b/mm/compaction.c index 00ad88395216..2c077a78487c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -990,7 +990,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, (unsigned long)cc, false, - cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); + cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC, + MR_COMPACTION); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6c5899b9034a..ddb68a169e45 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1558,7 +1558,8 @@ int soft_offline_page(struct page *page, int flags) page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, - false, MIGRATE_SYNC); + false, MIGRATE_SYNC, + MR_MEMORY_FAILURE); if (ret) { putback_lru_pages(&pagelist); pr_info("soft offline: %#lx: migration failed %d, type %lx\n", diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e4eeacae2b91..e598bd15c041 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -812,7 +812,8 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) * migrate_pages returns # of failed pages. */ ret = migrate_pages(&source, alloc_migrate_target, 0, - true, MIGRATE_SYNC); + true, MIGRATE_SYNC, + MR_MEMORY_HOTPLUG); if (ret) putback_lru_pages(&source); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d04a8a54c294..66e90ecc2350 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -961,7 +961,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_node_page, dest, - false, MIGRATE_SYNC); + false, MIGRATE_SYNC, + MR_SYSCALL); if (err) putback_lru_pages(&pagelist); } @@ -1202,7 +1203,8 @@ static long do_mbind(unsigned long start, unsigned long len, if (!list_empty(&pagelist)) { nr_failed = migrate_pages(&pagelist, new_vma_page, (unsigned long)vma, - false, MIGRATE_SYNC); + false, MIGRATE_SYNC, + MR_MEMPOLICY_MBIND); if (nr_failed) putback_lru_pages(&pagelist); } diff --git a/mm/migrate.c b/mm/migrate.c index 04687f69cc17..27be9c923dc1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -38,6 +38,9 @@ #include +#define CREATE_TRACE_POINTS +#include + #include "internal.h" /* @@ -958,7 +961,7 @@ out: */ int migrate_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, - enum migrate_mode mode) + enum migrate_mode mode, int reason) { int retry = 1; int nr_failed = 0; @@ -1004,6 +1007,8 @@ out: count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); if (nr_failed) count_vm_events(PGMIGRATE_FAIL, nr_failed); + trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason); + if (!swapwrite) current->flags &= ~PF_SWAPWRITE; @@ -1145,7 +1150,8 @@ set_status: err = 0; if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_page_node, - (unsigned long)pm, 0, MIGRATE_SYNC); + (unsigned long)pm, 0, MIGRATE_SYNC, + MR_SYSCALL); if (err) putback_lru_pages(&pagelist); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7bb35ac0964a..5953dc2d196f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5707,7 +5707,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, ret = migrate_pages(&cc->migratepages, alloc_migrate_target, - 0, false, MIGRATE_SYNC); + 0, false, MIGRATE_SYNC, + MR_CMA); } putback_lru_pages(&cc->migratepages); -- cgit v1.2.3 From 397487db696cae0b026a474a5cd66f4e372995e6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 19 Oct 2012 12:00:10 +0100 Subject: mm: compaction: Add scanned and isolated counters for compaction Compaction already has tracepoints to count scanned and isolated pages but it requires that ftrace be enabled and if that information has to be written to disk then it can be disruptive. This patch adds vmstat counters for compaction called compact_migrate_scanned, compact_free_scanned and compact_isolated. With these counters, it is possible to define a basic cost model for compaction. This approximates of how much work compaction is doing and can be compared that with an oprofile showing TLB misses and see if the cost of compaction is being offset by THP for example. Minimally a compaction patch can be evaluated in terms of whether it increases or decreases cost. The basic cost model looks like this Fundamental unit u: a word sizeof(void *) Ca = cost of struct page access = sizeof(struct page) / u Cmc = Cost migrate page copy = (Ca + PAGE_SIZE/u) * 2 Cmf = Cost migrate failure = Ca * 2 Ci = Cost page isolation = (Ca + Wi) where Wi is a constant that should reflect the approximate cost of the locking operation. Csm = Cost migrate scanning = Ca Csf = Cost free scanning = Ca Overall cost = (Csm * compact_migrate_scanned) + (Csf * compact_free_scanned) + (Ci * compact_isolated) + (Cmc * pgmigrate_success) + (Cmf * pgmigrate_failed) Where the values are read from /proc/vmstat. This is very basic and ignores certain costs such as the allocation cost to do a migrate page copy but any improvement to the model would still use the same vmstat counters. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/vm_event_item.h | 2 ++ mm/compaction.c | 8 ++++++++ mm/vmstat.c | 3 +++ 3 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 8aa7cb94b5fb..a1f750b8e72a 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -42,6 +42,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, #endif #ifdef CONFIG_COMPACTION + COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, + COMPACTISOLATED, COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, #endif #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/compaction.c b/mm/compaction.c index 2c077a78487c..aee7443a4d5a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -356,6 +356,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (blockpfn == end_pfn) update_pageblock_skip(cc, valid_page, total_isolated, false); + count_vm_events(COMPACTFREE_SCANNED, nr_scanned); + if (total_isolated) + count_vm_events(COMPACTISOLATED, total_isolated); + return total_isolated; } @@ -646,6 +650,10 @@ next_pageblock: trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); + count_vm_events(COMPACTMIGRATE_SCANNED, nr_scanned); + if (nr_isolated) + count_vm_events(COMPACTISOLATED, nr_isolated); + return low_pfn; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 89a7fd665b32..3a067fabe190 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -779,6 +779,9 @@ const char * const vmstat_text[] = { "pgmigrate_fail", #endif #ifdef CONFIG_COMPACTION + "compact_migrate_scanned", + "compact_free_scanned", + "compact_isolated", "compact_stall", "compact_fail", "compact_success", -- cgit v1.2.3 From 0b9d705297b273657923518dbea2377cd03532ed Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 5 Oct 2012 21:36:27 +0200 Subject: mm: numa: Support NUMA hinting page faults from gup/gup_fast Introduce FOLL_NUMA to tell follow_page to check pte/pmd_numa. get_user_pages must use FOLL_NUMA, and it's safe to do so because it always invokes handle_mm_fault and retries the follow_page later. KVM secondary MMU page faults will trigger the NUMA hinting page faults through gup_fast -> get_user_pages -> follow_page -> handle_mm_fault. Other follow_page callers like KSM should not use FOLL_NUMA, or they would fail to get the pages if they use follow_page instead of get_user_pages. [ This patch was picked up from the AutoNUMA tree. ] Originally-by: Andrea Arcangeli Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Rik van Riel [ ported to this tree. ] Signed-off-by: Ingo Molnar Reviewed-by: Rik van Riel --- include/linux/mm.h | 1 + mm/memory.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1856c62d82cd..fa1615211159 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1572,6 +1572,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_MLOCK 0x40 /* mark page as mlocked */ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ +#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/memory.c b/mm/memory.c index 7cf762857baa..cd8e0daf1912 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1517,6 +1517,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); goto out; } + if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) + goto no_page_table; if (pmd_trans_huge(*pmd)) { if (flags & FOLL_SPLIT) { split_huge_page_pmd(mm, pmd); @@ -1546,6 +1548,8 @@ split_fallthrough: pte = *ptep; if (!pte_present(pte)) goto no_page; + if ((flags & FOLL_NUMA) && pte_numa(pte)) + goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; @@ -1697,6 +1701,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm_flags &= (gup_flags & FOLL_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); + + /* + * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault + * would be called on PROT_NONE ranges. We must never invoke + * handle_mm_fault on PROT_NONE ranges or the NUMA hinting + * page faults would unprotect the PROT_NONE ranges if + * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd + * bitflag. So to avoid that, don't set FOLL_NUMA if + * FOLL_FORCE is set. + */ + if (!(gup_flags & FOLL_FORCE)) + gup_flags |= FOLL_NUMA; + i = 0; do { -- cgit v1.2.3 From d10e63f29488b0f312a443f9507ea9b6fd3c9090 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 25 Oct 2012 14:16:31 +0200 Subject: mm: numa: Create basic numa page hinting infrastructure Note: This patch started as "mm/mpol: Create special PROT_NONE infrastructure" and preserves the basic idea but steals *very* heavily from "autonuma: numa hinting page faults entry points" for the actual fault handlers without the migration parts. The end result is barely recognisable as either patch so all Signed-off and Reviewed-bys are dropped. If Peter, Ingo and Andrea are ok with this version, I will re-add the signed-offs-by to reflect the history. In order to facilitate a lazy -- fault driven -- migration of pages, create a special transient PAGE_NUMA variant, we can then use the 'spurious' protection faults to drive our migrations from. The meaning of PAGE_NUMA depends on the architecture but on x86 it is effectively PROT_NONE. Actual PROT_NONE mappings will not generate these NUMA faults for the reason that the page fault code checks the permission on the VMA (and will throw a segmentation fault on actual PROT_NONE mappings), before it ever calls handle_mm_fault. [dhillf@gmail.com: Fix typo] Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/huge_mm.h | 10 +++++ mm/huge_memory.c | 22 ++++++++++ mm/memory.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 141 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b31cb7da0346..a1d26a98c655 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -159,6 +159,10 @@ static inline struct page *compound_trans_head(struct page *page) } return page; } + +extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, + pmd_t pmd, pmd_t *pmdp); + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -195,6 +199,12 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, { return 0; } + +static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, + pmd_t pmd, pmd_t *pmdp) +{ +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cd24aa562144..f5f37630c54d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1018,6 +1018,28 @@ out: return page; } +/* NUMA hinting page fault entry point for trans huge pmds */ +int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, + pmd_t pmd, pmd_t *pmdp) +{ + struct page *page; + unsigned long haddr = addr & HPAGE_PMD_MASK; + + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(pmd, *pmdp))) + goto out_unlock; + + page = pmd_page(pmd); + pmd = pmd_mknonnuma(pmd); + set_pmd_at(mm, haddr, pmdp, pmd); + VM_BUG_ON(pmd_numa(*pmdp)); + update_mmu_cache_pmd(vma, addr, pmdp); + +out_unlock: + spin_unlock(&mm->page_table_lock); + return 0; +} + int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { diff --git a/mm/memory.c b/mm/memory.c index cd8e0daf1912..e30616f2cc3d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3448,6 +3448,103 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); } +int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) +{ + struct page *page; + spinlock_t *ptl; + + /* + * The "pte" at this point cannot be used safely without + * validation through pte_unmap_same(). It's of NUMA type but + * the pfn may be screwed if the read is non atomic. + * + * ptep_modify_prot_start is not called as this is clearing + * the _PAGE_NUMA bit and it is not really expected that there + * would be concurrent hardware modifications to the PTE. + */ + ptl = pte_lockptr(mm, pmd); + spin_lock(ptl); + if (unlikely(!pte_same(*ptep, pte))) + goto out_unlock; + pte = pte_mknonnuma(pte); + set_pte_at(mm, addr, ptep, pte); + update_mmu_cache(vma, addr, ptep); + + page = vm_normal_page(vma, addr, pte); + if (!page) { + pte_unmap_unlock(ptep, ptl); + return 0; + } + +out_unlock: + pte_unmap_unlock(ptep, ptl); + return 0; +} + +/* NUMA hinting page fault entry point for regular pmds */ +#ifdef CONFIG_NUMA_BALANCING +static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t pmd; + pte_t *pte, *orig_pte; + unsigned long _addr = addr & PMD_MASK; + unsigned long offset; + spinlock_t *ptl; + bool numa = false; + + spin_lock(&mm->page_table_lock); + pmd = *pmdp; + if (pmd_numa(pmd)) { + set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd)); + numa = true; + } + spin_unlock(&mm->page_table_lock); + + if (!numa) + return 0; + + /* we're in a page fault so some vma must be in the range */ + BUG_ON(!vma); + BUG_ON(vma->vm_start >= _addr + PMD_SIZE); + offset = max(_addr, vma->vm_start) & ~PMD_MASK; + VM_BUG_ON(offset >= PMD_SIZE); + orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl); + pte += offset >> PAGE_SHIFT; + for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { + pte_t pteval = *pte; + struct page *page; + if (!pte_present(pteval)) + continue; + if (!pte_numa(pteval)) + continue; + if (addr >= vma->vm_end) { + vma = find_vma(mm, addr); + /* there's a pte present so there must be a vma */ + BUG_ON(!vma); + BUG_ON(addr < vma->vm_start); + } + if (pte_numa(pteval)) { + pteval = pte_mknonnuma(pteval); + set_pte_at(mm, addr, pte, pteval); + } + page = vm_normal_page(vma, addr, pteval); + if (unlikely(!page)) + continue; + } + pte_unmap_unlock(orig_pte, ptl); + + return 0; +} +#else +static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + BUG(); +} +#endif /* CONFIG_NUMA_BALANCING */ + /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most @@ -3486,6 +3583,9 @@ int handle_pte_fault(struct mm_struct *mm, pte, pmd, flags, entry); } + if (pte_numa(entry)) + return do_numa_page(mm, vma, address, entry, pte, pmd); + ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (unlikely(!pte_same(*pte, entry))) @@ -3554,9 +3654,11 @@ retry: barrier(); if (pmd_trans_huge(orig_pmd)) { - if (flags & FAULT_FLAG_WRITE && - !pmd_write(orig_pmd) && - !pmd_trans_splitting(orig_pmd)) { + if (pmd_numa(*pmd)) + return do_huge_pmd_numa_page(mm, address, + orig_pmd, pmd); + + if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { ret = do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd); /* @@ -3568,10 +3670,14 @@ retry: goto retry; return ret; } + return 0; } } + if (pmd_numa(*pmd)) + return do_pmd_numa_page(mm, vma, address, pmd); + /* * Use __pte_alloc instead of pte_alloc_map, because we can't * run pte_offset_map on the pmd, if an huge pmd could -- cgit v1.2.3 From 771fb4d806a92bf6c988fcfbd286ae40a9374332 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:30 +0200 Subject: mm: mempolicy: Check for misplaced page This patch provides a new function to test whether a page resides on a node that is appropriate for the mempolicy for the vma and address where the page is supposed to be mapped. This involves looking up the node where the page belongs. So, the function returns that node so that it may be used to allocated the page without consulting the policy again. A subsequent patch will call this function from the fault path. Because of this, I don't want to go ahead and allocate the page, e.g., via alloc_page_vma() only to have to free it if it has the correct policy. So, I just mimic the alloc_page_vma() node computation logic--sort of. Note: we could use this function to implement a MPOL_MF_STRICT behavior when migrating pages to match mbind() mempolicy--e.g., to ensure that pages in an interleaved range are reinterleaved rather than left where they are when they reside on any page in the interleave nodemask. Signed-off-by: Lee Schermerhorn Reviewed-by: Rik van Riel Cc: Andrew Morton Cc: Linus Torvalds [ Added MPOL_F_LAZY to trigger migrate-on-fault; simplified code now that we don't have to bother with special crap for interleaved ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/linux/mempolicy.h | 8 +++++ include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 76 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e5ccb9ddd90e..c511e2523560 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -198,6 +198,8 @@ static inline int vma_migratable(struct vm_area_struct *vma) return 1; } +extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); + #else struct mempolicy {}; @@ -323,5 +325,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, return 0; } +static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, + unsigned long address) +{ + return -1; /* no node preference */ +} + #endif /* CONFIG_NUMA */ #endif diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index d23dca8367cc..472de8a5d37e 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -61,6 +61,7 @@ enum mpol_rebind_step { #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ +#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c21e91477c4f..df1466d3d2d8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2181,6 +2181,82 @@ static void sp_free(struct sp_node *n) kmem_cache_free(sn_cache, n); } +/** + * mpol_misplaced - check whether current page node is valid in policy + * + * @page - page to be checked + * @vma - vm area where page mapped + * @addr - virtual address where page mapped + * + * Lookup current policy node id for vma,addr and "compare to" page's + * node id. + * + * Returns: + * -1 - not misplaced, page is in the right node + * node - node id where the page should be + * + * Policy determination "mimics" alloc_page_vma(). + * Called from fault path where we know the vma and faulting address. + */ +int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol; + struct zone *zone; + int curnid = page_to_nid(page); + unsigned long pgoff; + int polnid = -1; + int ret = -1; + + BUG_ON(!vma); + + pol = get_vma_policy(current, vma, addr); + if (!(pol->flags & MPOL_F_MOF)) + goto out; + + switch (pol->mode) { + case MPOL_INTERLEAVE: + BUG_ON(addr >= vma->vm_end); + BUG_ON(addr < vma->vm_start); + + pgoff = vma->vm_pgoff; + pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; + polnid = offset_il_node(pol, vma, pgoff); + break; + + case MPOL_PREFERRED: + if (pol->flags & MPOL_F_LOCAL) + polnid = numa_node_id(); + else + polnid = pol->v.preferred_node; + break; + + case MPOL_BIND: + /* + * allows binding to multiple nodes. + * use current page if in policy nodemask, + * else select nearest allowed node, if any. + * If no allowed nodes, use current [!misplaced]. + */ + if (node_isset(curnid, pol->v.nodes)) + goto out; + (void)first_zones_zonelist( + node_zonelist(numa_node_id(), GFP_HIGHUSER), + gfp_zone(GFP_HIGHUSER), + &pol->v.nodes, &zone); + polnid = zone->node; + break; + + default: + BUG(); + } + if (curnid != polnid) + ret = polnid; +out: + mpol_cond_put(pol); + + return ret; +} + static void sp_delete(struct shared_policy *sp, struct sp_node *n) { pr_debug("deleting %lx-l%lx\n", n->start, n->end); -- cgit v1.2.3 From 7039e1dbec6eeaa8ecab43a82d6589eeced995c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Oct 2012 14:16:34 +0200 Subject: mm: migrate: Introduce migrate_misplaced_page() Note: This was originally based on Peter's patch "mm/migrate: Introduce migrate_misplaced_page()" but borrows extremely heavily from Andrea's "autonuma: memory follows CPU algorithm and task/mm_autonuma stats collection". The end result is barely recognisable so signed-offs had to be dropped. If original authors are ok with it, I'll re-add the signed-off-bys. Add migrate_misplaced_page() which deals with migrating pages from faults. Based-on-work-by: Lee Schermerhorn Based-on-work-by: Peter Zijlstra Based-on-work-by: Andrea Arcangeli Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/migrate.h | 11 +++++ mm/migrate.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 9d1c159e2427..f0d0313eea6f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -13,6 +13,7 @@ enum migrate_reason { MR_MEMORY_HOTPLUG, MR_SYSCALL, /* also applies to cpusets */ MR_MEMPOLICY_MBIND, + MR_NUMA_MISPLACED, MR_CMA }; @@ -73,4 +74,14 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #define fail_migrate_page NULL #endif /* CONFIG_MIGRATION */ + +#ifdef CONFIG_NUMA_BALANCING +extern int migrate_misplaced_page(struct page *page, int node); +#else +static inline int migrate_misplaced_page(struct page *page, int node) +{ + return -EAGAIN; /* can't migrate now */ +} +#endif /* CONFIG_NUMA_BALANCING */ + #endif /* _LINUX_MIGRATE_H */ diff --git a/mm/migrate.c b/mm/migrate.c index 27be9c923dc1..d168aec98427 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -282,7 +282,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, struct buffer_head *head, enum migrate_mode mode) { - int expected_count; + int expected_count = 0; void **pslot; if (!mapping) { @@ -1415,4 +1415,108 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, } return err; } -#endif + +#ifdef CONFIG_NUMA_BALANCING +/* + * Returns true if this is a safe migration target node for misplaced NUMA + * pages. Currently it only checks the watermarks which crude + */ +static bool migrate_balanced_pgdat(struct pglist_data *pgdat, + int nr_migrate_pages) +{ + int z; + for (z = pgdat->nr_zones - 1; z >= 0; z--) { + struct zone *zone = pgdat->node_zones + z; + + if (!populated_zone(zone)) + continue; + + if (zone->all_unreclaimable) + continue; + + /* Avoid waking kswapd by allocating pages_to_migrate pages. */ + if (!zone_watermark_ok(zone, 0, + high_wmark_pages(zone) + + nr_migrate_pages, + 0, 0)) + continue; + return true; + } + return false; +} + +static struct page *alloc_misplaced_dst_page(struct page *page, + unsigned long data, + int **result) +{ + int nid = (int) data; + struct page *newpage; + + newpage = alloc_pages_exact_node(nid, + (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | + __GFP_NOMEMALLOC | __GFP_NORETRY | + __GFP_NOWARN) & + ~GFP_IOFS, 0); + return newpage; +} + +/* + * Attempt to migrate a misplaced page to the specified destination + * node. Caller is expected to have an elevated reference count on + * the page that will be dropped by this function before returning. + */ +int migrate_misplaced_page(struct page *page, int node) +{ + int isolated = 0; + LIST_HEAD(migratepages); + + /* + * Don't migrate pages that are mapped in multiple processes. + * TODO: Handle false sharing detection instead of this hammer + */ + if (page_mapcount(page) != 1) { + put_page(page); + goto out; + } + + /* Avoid migrating to a node that is nearly full */ + if (migrate_balanced_pgdat(NODE_DATA(node), 1)) { + int page_lru; + + if (isolate_lru_page(page)) { + put_page(page); + goto out; + } + isolated = 1; + + /* + * Page is isolated which takes a reference count so now the + * callers reference can be safely dropped without the page + * disappearing underneath us during migration + */ + put_page(page); + + page_lru = page_is_file_cache(page); + inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); + list_add(&page->lru, &migratepages); + } + + if (isolated) { + int nr_remaining; + + nr_remaining = migrate_pages(&migratepages, + alloc_misplaced_dst_page, + node, false, MIGRATE_ASYNC, + MR_NUMA_MISPLACED); + if (nr_remaining) { + putback_lru_pages(&migratepages); + isolated = 0; + } + } + BUG_ON(!list_empty(&migratepages)); +out: + return isolated; +} +#endif /* CONFIG_NUMA_BALANCING */ + +#endif /* CONFIG_NUMA */ -- cgit v1.2.3 From 4daae3b4b9e49b7e0935499a352f1c59d90287d2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 2 Nov 2012 11:33:45 +0000 Subject: mm: mempolicy: Use _PAGE_NUMA to migrate pages Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but sufficiently different that the signed-off-bys were dropped Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page() pieces into an effective migrate on fault scheme. Note that (on x86) we rely on PROT_NONE pages being !present and avoid the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the page-migration performance. Based-on-work-by: Peter Zijlstra Signed-off-by: Mel Gorman --- include/linux/huge_mm.h | 9 +++++---- mm/huge_memory.c | 31 ++++++++++++++++++++++++++++--- mm/memory.c | 32 +++++++++++++++++++++++++++----- 3 files changed, 60 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a1d26a98c655..dabb5108d6c0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page) return page; } -extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, - pmd_t pmd, pmd_t *pmdp); +extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd, pmd_t *pmdp); #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) @@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, return 0; } -static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, - pmd_t pmd, pmd_t *pmdp) +static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd, pmd_t *pmdp) { + return 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f5f37630c54d..5723b551c023 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -1019,17 +1020,39 @@ out: } /* NUMA hinting page fault entry point for trans huge pmds */ -int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, - pmd_t pmd, pmd_t *pmdp) +int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t pmd, pmd_t *pmdp) { - struct page *page; + struct page *page = NULL; unsigned long haddr = addr & HPAGE_PMD_MASK; + int target_nid; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; page = pmd_page(pmd); + get_page(page); + spin_unlock(&mm->page_table_lock); + + target_nid = mpol_misplaced(page, vma, haddr); + if (target_nid == -1) + goto clear_pmdnuma; + + /* + * Due to lacking code to migrate thp pages, we'll split + * (which preserves the special PROT_NONE) and re-take the + * fault on the normal pages. + */ + split_huge_page(page); + put_page(page); + return 0; + +clear_pmdnuma: + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(pmd, *pmdp))) + goto out_unlock; + pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); VM_BUG_ON(pmd_numa(*pmdp)); @@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, out_unlock: spin_unlock(&mm->page_table_lock); + if (page) + put_page(page); return 0; } diff --git a/mm/memory.c b/mm/memory.c index e30616f2cc3d..d52542680e10 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) { - struct page *page; + struct page *page = NULL; spinlock_t *ptl; + int current_nid, target_nid; /* * The "pte" at this point cannot be used safely without @@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, */ ptl = pte_lockptr(mm, pmd); spin_lock(ptl); - if (unlikely(!pte_same(*ptep, pte))) - goto out_unlock; + if (unlikely(!pte_same(*ptep, pte))) { + pte_unmap_unlock(ptep, ptl); + goto out; + } + pte = pte_mknonnuma(pte); set_pte_at(mm, addr, ptep, pte); update_mmu_cache(vma, addr, ptep); @@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } -out_unlock: + get_page(page); + current_nid = page_to_nid(page); + target_nid = mpol_misplaced(page, vma, addr); pte_unmap_unlock(ptep, ptl); + if (target_nid == -1) { + /* + * Account for the fault against the current node if it not + * being replaced regardless of where the page is located. + */ + current_nid = numa_node_id(); + put_page(page); + goto out; + } + + /* Migrate to the requested node */ + if (migrate_misplaced_page(page, target_nid)) + current_nid = target_nid; + +out: return 0; } @@ -3655,7 +3677,7 @@ retry: barrier(); if (pmd_trans_huge(orig_pmd)) { if (pmd_numa(*pmd)) - return do_huge_pmd_numa_page(mm, address, + return do_huge_pmd_numa_page(mm, vma, address, orig_pmd, pmd); if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { -- cgit v1.2.3 From b24f53a0bea38b266d219ee651b22dba727c44ae Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:32 +0200 Subject: mm: mempolicy: Add MPOL_MF_LAZY NOTE: Once again there is a lot of patch stealing and the end result is sufficiently different that I had to drop the signed-offs. Will re-add if the original authors are ok with that. This patch adds another mbind() flag to request "lazy migration". The flag, MPOL_MF_LAZY, modifies MPOL_MF_MOVE* such that the selected pages are marked PROT_NONE. The pages will be migrated in the fault path on "first touch", if the policy dictates at that time. "Lazy Migration" will allow testing of migrate-on-fault via mbind(). Also allows applications to specify that only subsequently touched pages be migrated to obey new policy, instead of all pages in range. This can be useful for multi-threaded applications working on a large shared data area that is initialized by an initial thread resulting in all pages on one [or a few, if overflowed] nodes. After PROT_NONE, the pages in regions assigned to the worker threads will be automatically migrated local to the threads on 1st touch. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/mm.h | 5 ++ include/uapi/linux/mempolicy.h | 13 ++- mm/mempolicy.c | 185 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 185 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fa1615211159..471185e29bab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1551,6 +1551,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) } #endif +#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +void change_prot_numa(struct vm_area_struct *vma, + unsigned long start, unsigned long end); +#endif + struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 472de8a5d37e..6a1baae3775d 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -49,9 +49,16 @@ enum mpol_rebind_step { /* Flags for mbind */ #define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ -#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ -#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ -#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ +#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform + to policy */ +#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */ +#define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */ +#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */ + +#define MPOL_MF_VALID (MPOL_MF_STRICT | \ + MPOL_MF_MOVE | \ + MPOL_MF_MOVE_ALL | \ + MPOL_MF_LAZY) /* * Internal flags that share the struct mempolicy flags word with diff --git a/mm/mempolicy.c b/mm/mempolicy.c index df1466d3d2d8..51d3ebd8561e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -565,6 +566,145 @@ static inline int check_pgd_range(struct vm_area_struct *vma, return 0; } +#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +/* + * Here we search for not shared page mappings (mapcount == 1) and we + * set up the pmd/pte_numa on those mappings so the very next access + * will fire a NUMA hinting page fault. + */ +static int +change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte, *_pte; + struct page *page; + unsigned long _address, end; + spinlock_t *ptl; + int ret = 0; + + VM_BUG_ON(address & ~PAGE_MASK); + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + goto out; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd)) + goto out; + + if (pmd_trans_huge_lock(pmd, vma) == 1) { + int page_nid; + ret = HPAGE_PMD_NR; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + if (pmd_numa(*pmd)) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + page = pmd_page(*pmd); + + /* only check non-shared pages */ + if (page_mapcount(page) != 1) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + page_nid = page_to_nid(page); + + if (pmd_numa(*pmd)) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); + ret += HPAGE_PMD_NR; + /* defer TLB flush to lower the overhead */ + spin_unlock(&mm->page_table_lock); + goto out; + } + + if (pmd_trans_unstable(pmd)) + goto out; + VM_BUG_ON(!pmd_present(*pmd)); + + end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK); + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + for (_address = address, _pte = pte; _address < end; + _pte++, _address += PAGE_SIZE) { + pte_t pteval = *_pte; + if (!pte_present(pteval)) + continue; + if (pte_numa(pteval)) + continue; + page = vm_normal_page(vma, _address, pteval); + if (unlikely(!page)) + continue; + /* only check non-shared pages */ + if (page_mapcount(page) != 1) + continue; + + set_pte_at(mm, _address, _pte, pte_mknuma(pteval)); + + /* defer TLB flush to lower the overhead */ + ret++; + } + pte_unmap_unlock(pte, ptl); + + if (ret && !pmd_numa(*pmd)) { + spin_lock(&mm->page_table_lock); + set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); + spin_unlock(&mm->page_table_lock); + /* defer TLB flush to lower the overhead */ + } + +out: + return ret; +} + +/* Assumes mmap_sem is held */ +void +change_prot_numa(struct vm_area_struct *vma, + unsigned long address, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int progress = 0; + + while (address < end) { + VM_BUG_ON(address < vma->vm_start || + address + PAGE_SIZE > vma->vm_end); + + progress += change_prot_numa_range(mm, vma, address); + address = (address + PMD_SIZE) & PMD_MASK; + } + + /* + * Flush the TLB for the mm to start the NUMA hinting + * page faults after we finish scanning this vma part + * if there were any PTE updates + */ + if (progress) { + mmu_notifier_invalidate_range_start(vma->vm_mm, address, end); + flush_tlb_range(vma, address, end); + mmu_notifier_invalidate_range_end(vma->vm_mm, address, end); + } +} +#else +static unsigned long change_prot_numa(struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + return 0; +} +#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ + /* * Check if all pages in a range are on a set of nodes. * If pagelist != NULL then isolate pages from the LRU and @@ -583,22 +723,32 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return ERR_PTR(-EFAULT); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { + unsigned long endvma = vma->vm_end; + + if (endvma > end) + endvma = end; + if (vma->vm_start > start) + start = vma->vm_start; + if (!(flags & MPOL_MF_DISCONTIG_OK)) { if (!vma->vm_next && vma->vm_end < end) return ERR_PTR(-EFAULT); if (prev && prev->vm_end < vma->vm_start) return ERR_PTR(-EFAULT); } - if (!is_vm_hugetlb_page(vma) && - ((flags & MPOL_MF_STRICT) || + + if (is_vm_hugetlb_page(vma)) + goto next; + + if (flags & MPOL_MF_LAZY) { + change_prot_numa(vma, start, endvma); + goto next; + } + + if ((flags & MPOL_MF_STRICT) || ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && - vma_migratable(vma)))) { - unsigned long endvma = vma->vm_end; + vma_migratable(vma))) { - if (endvma > end) - endvma = end; - if (vma->vm_start > start) - start = vma->vm_start; err = check_pgd_range(vma, start, endvma, nodes, flags, private); if (err) { @@ -606,6 +756,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, break; } } +next: prev = vma; } return first; @@ -1138,8 +1289,7 @@ static long do_mbind(unsigned long start, unsigned long len, int err; LIST_HEAD(pagelist); - if (flags & ~(unsigned long)(MPOL_MF_STRICT | - MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & ~(unsigned long)MPOL_MF_VALID) return -EINVAL; if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; @@ -1162,6 +1312,9 @@ static long do_mbind(unsigned long start, unsigned long len, if (IS_ERR(new)) return PTR_ERR(new); + if (flags & MPOL_MF_LAZY) + new->flags |= MPOL_F_MOF; + /* * If we are using the default policy then operation * on discontinuous address spaces is okay after all @@ -1198,13 +1351,15 @@ static long do_mbind(unsigned long start, unsigned long len, vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); - err = PTR_ERR(vma); - if (!IS_ERR(vma)) { - int nr_failed = 0; - + err = PTR_ERR(vma); /* maybe ... */ + if (!IS_ERR(vma) && mode != MPOL_NOOP) err = mbind_range(mm, start, end, new); + if (!err) { + int nr_failed = 0; + if (!list_empty(&pagelist)) { + WARN_ON_ONCE(flags & MPOL_MF_LAZY); nr_failed = migrate_pages(&pagelist, new_vma_page, (unsigned long)vma, false, MIGRATE_SYNC, @@ -1213,7 +1368,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_lru_pages(&pagelist); } - if (!err && nr_failed && (flags & MPOL_MF_STRICT)) + if (nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; } else putback_lru_pages(&pagelist); -- cgit v1.2.3 From 4b10e7d562c90d0a72f324832c26653947a07381 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 25 Oct 2012 14:16:32 +0200 Subject: mm: mempolicy: Implement change_prot_numa() in terms of change_protection() This patch converts change_prot_numa() to use change_protection(). As pte_numa and friends check the PTE bits directly it is necessary for change_protection() to use pmd_mknuma(). Hence the required modifications to change_protection() are a little clumsy but the end result is that most of the numa page table helpers are just one or two instructions. Signed-off-by: Mel Gorman --- include/linux/huge_mm.h | 3 +- include/linux/mm.h | 4 +- mm/huge_memory.c | 14 ++++- mm/mempolicy.c | 137 +++++------------------------------------------- mm/mprotect.c | 72 +++++++++++++++++++------ 5 files changed, 85 insertions(+), 145 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index dabb5108d6c0..027ad04ef3a8 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -27,7 +27,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, pgprot_t newprot); + unsigned long addr, pgprot_t newprot, + int prot_numa); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, diff --git a/include/linux/mm.h b/include/linux/mm.h index 471185e29bab..d04c2f0aab36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1080,7 +1080,7 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long flags, unsigned long new_addr); extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, - int dirty_accountable); + int dirty_accountable, int prot_numa); extern int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); @@ -1552,7 +1552,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) #endif #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE -void change_prot_numa(struct vm_area_struct *vma, +unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); #endif diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5723b551c023..d79f7a55bf6f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1147,7 +1147,7 @@ out: } int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, pgprot_t newprot) + unsigned long addr, pgprot_t newprot, int prot_numa) { struct mm_struct *mm = vma->vm_mm; int ret = 0; @@ -1155,7 +1155,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (__pmd_trans_huge_lock(pmd, vma) == 1) { pmd_t entry; entry = pmdp_get_and_clear(mm, addr, pmd); - entry = pmd_modify(entry, newprot); + if (!prot_numa) + entry = pmd_modify(entry, newprot); + else { + struct page *page = pmd_page(*pmd); + + /* only check non-shared pages */ + if (page_mapcount(page) == 1 && + !pmd_numa(*pmd)) { + entry = pmd_mknuma(entry); + } + } set_pmd_at(mm, addr, pmd, entry); spin_unlock(&vma->vm_mm->page_table_lock); ret = 1; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 51d3ebd8561e..75d4600a5e92 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma, #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE /* - * Here we search for not shared page mappings (mapcount == 1) and we - * set up the pmd/pte_numa on those mappings so the very next access - * will fire a NUMA hinting page fault. + * This is used to mark a range of virtual addresses to be inaccessible. + * These are later cleared by a NUMA hinting fault. Depending on these + * faults, pages may be migrated for better NUMA placement. + * + * This is assuming that NUMA faults are handled using PROT_NONE. If + * an architecture makes a different choice, it will need further + * changes to the core. */ -static int -change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte, *_pte; - struct page *page; - unsigned long _address, end; - spinlock_t *ptl; - int ret = 0; - - VM_BUG_ON(address & ~PAGE_MASK); - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto out; - - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - goto out; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) - goto out; - - if (pmd_trans_huge_lock(pmd, vma) == 1) { - int page_nid; - ret = HPAGE_PMD_NR; - - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - - if (pmd_numa(*pmd)) { - spin_unlock(&mm->page_table_lock); - goto out; - } - - page = pmd_page(*pmd); - - /* only check non-shared pages */ - if (page_mapcount(page) != 1) { - spin_unlock(&mm->page_table_lock); - goto out; - } - - page_nid = page_to_nid(page); - - if (pmd_numa(*pmd)) { - spin_unlock(&mm->page_table_lock); - goto out; - } - - set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); - ret += HPAGE_PMD_NR; - /* defer TLB flush to lower the overhead */ - spin_unlock(&mm->page_table_lock); - goto out; - } - - if (pmd_trans_unstable(pmd)) - goto out; - VM_BUG_ON(!pmd_present(*pmd)); - - end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK); - pte = pte_offset_map_lock(mm, pmd, address, &ptl); - for (_address = address, _pte = pte; _address < end; - _pte++, _address += PAGE_SIZE) { - pte_t pteval = *_pte; - if (!pte_present(pteval)) - continue; - if (pte_numa(pteval)) - continue; - page = vm_normal_page(vma, _address, pteval); - if (unlikely(!page)) - continue; - /* only check non-shared pages */ - if (page_mapcount(page) != 1) - continue; - - set_pte_at(mm, _address, _pte, pte_mknuma(pteval)); - - /* defer TLB flush to lower the overhead */ - ret++; - } - pte_unmap_unlock(pte, ptl); - - if (ret && !pmd_numa(*pmd)) { - spin_lock(&mm->page_table_lock); - set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); - spin_unlock(&mm->page_table_lock); - /* defer TLB flush to lower the overhead */ - } - -out: - return ret; -} - -/* Assumes mmap_sem is held */ -void -change_prot_numa(struct vm_area_struct *vma, - unsigned long address, unsigned long end) +unsigned long change_prot_numa(struct vm_area_struct *vma, + unsigned long addr, unsigned long end) { - struct mm_struct *mm = vma->vm_mm; - int progress = 0; - - while (address < end) { - VM_BUG_ON(address < vma->vm_start || - address + PAGE_SIZE > vma->vm_end); + int nr_updated; + BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE); - progress += change_prot_numa_range(mm, vma, address); - address = (address + PMD_SIZE) & PMD_MASK; - } + nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1); - /* - * Flush the TLB for the mm to start the NUMA hinting - * page faults after we finish scanning this vma part - * if there were any PTE updates - */ - if (progress) { - mmu_notifier_invalidate_range_start(vma->vm_mm, address, end); - flush_tlb_range(vma, address, end); - mmu_notifier_invalidate_range_end(vma->vm_mm, address, end); - } + return nr_updated; } #else static unsigned long change_prot_numa(struct vm_area_struct *vma, diff --git a/mm/mprotect.c b/mm/mprotect.c index 7c3628a8b486..7ef6ae964e8f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -35,10 +35,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) } #endif -static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, +static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, - int dirty_accountable) + int dirty_accountable, int prot_numa) { + struct mm_struct *mm = vma->vm_mm; pte_t *pte, oldpte; spinlock_t *ptl; unsigned long pages = 0; @@ -49,19 +50,39 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; + bool updated = false; ptent = ptep_modify_prot_start(mm, addr, pte); - ptent = pte_modify(ptent, newprot); + if (!prot_numa) { + ptent = pte_modify(ptent, newprot); + updated = true; + } else { + struct page *page; + + page = vm_normal_page(vma, addr, oldpte); + if (page) { + /* only check non-shared pages */ + if (!pte_numa(oldpte) && + page_mapcount(page) == 1) { + ptent = pte_mknuma(ptent); + updated = true; + } + } + } /* * Avoid taking write faults for pages we know to be * dirty. */ - if (dirty_accountable && pte_dirty(ptent)) + if (dirty_accountable && pte_dirty(ptent)) { ptent = pte_mkwrite(ptent); + updated = true; + } + + if (updated) + pages++; ptep_modify_prot_commit(mm, addr, pte, ptent); - pages++; } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); @@ -83,9 +104,25 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd, return pages; } +#ifdef CONFIG_NUMA_BALANCING +static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, + pmd_t *pmd) +{ + spin_lock(&mm->page_table_lock); + set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd)); + spin_unlock(&mm->page_table_lock); +} +#else +static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, + pmd_t *pmd) +{ + BUG(); +} +#endif /* CONFIG_NUMA_BALANCING */ + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, - int dirty_accountable) + int dirty_accountable, int prot_numa) { pmd_t *pmd; unsigned long next; @@ -97,7 +134,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * if (pmd_trans_huge(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) split_huge_page_pmd(vma->vm_mm, pmd); - else if (change_huge_pmd(vma, pmd, addr, newprot)) { + else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) { pages += HPAGE_PMD_NR; continue; } @@ -105,8 +142,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * } if (pmd_none_or_clear_bad(pmd)) continue; - pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot, - dirty_accountable); + pages += change_pte_range(vma, pmd, addr, next, newprot, + dirty_accountable, prot_numa); + + if (prot_numa) + change_pmd_protnuma(vma->vm_mm, addr, pmd); } while (pmd++, addr = next, addr != end); return pages; @@ -114,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t newprot, - int dirty_accountable) + int dirty_accountable, int prot_numa) { pud_t *pud; unsigned long next; @@ -126,7 +166,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t * if (pud_none_or_clear_bad(pud)) continue; pages += change_pmd_range(vma, pud, addr, next, newprot, - dirty_accountable); + dirty_accountable, prot_numa); } while (pud++, addr = next, addr != end); return pages; @@ -134,7 +174,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t * static unsigned long change_protection_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, - int dirty_accountable) + int dirty_accountable, int prot_numa) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; @@ -150,7 +190,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, if (pgd_none_or_clear_bad(pgd)) continue; pages += change_pud_range(vma, pgd, addr, next, newprot, - dirty_accountable); + dirty_accountable, prot_numa); } while (pgd++, addr = next, addr != end); /* Only flush the TLB if we actually modified any entries: */ @@ -162,7 +202,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, - int dirty_accountable) + int dirty_accountable, int prot_numa) { struct mm_struct *mm = vma->vm_mm; unsigned long pages; @@ -171,7 +211,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(vma, start, end, newprot); else - pages = change_protection_range(vma, start, end, newprot, dirty_accountable); + pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); mmu_notifier_invalidate_range_end(mm, start, end); return pages; @@ -249,7 +289,7 @@ success: dirty_accountable = 1; } - change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); + change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); -- cgit v1.2.3 From cbee9f88ec1b8dd6b58f25f54e4f52c82ed77690 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Oct 2012 14:16:43 +0200 Subject: mm: numa: Add fault driven placement and migration NOTE: This patch is based on "sched, numa, mm: Add fault driven placement and migration policy" but as it throws away all the policy to just leave a basic foundation I had to drop the signed-offs-by. This patch creates a bare-bones method for setting PTEs pte_numa in the context of the scheduler that when faulted later will be faulted onto the node the CPU is running on. In itself this does nothing useful but any placement policy will fundamentally depend on receiving hints on placement from fault context and doing something intelligent about it. Signed-off-by: Mel Gorman Acked-by: Rik van Riel --- arch/sh/mm/Kconfig | 1 + arch/x86/Kconfig | 2 + include/linux/mm_types.h | 11 +++++ include/linux/sched.h | 20 ++++++++ kernel/sched/core.c | 13 +++++ kernel/sched/fair.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/features.h | 7 +++ kernel/sched/sched.h | 6 +++ kernel/sysctl.c | 24 ++++++++- mm/huge_memory.c | 5 +- mm/memory.c | 14 +++++- 11 files changed, 224 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index cb8f9920f4dd..0f7c852f355c 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -111,6 +111,7 @@ config VSYSCALL config NUMA bool "Non Uniform Memory Access (NUMA) Support" depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL + select ARCH_WANT_NUMA_VARIABLE_LOCALITY default n help Some SH systems have many various memories scattered around diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff3ced2..1137028fc6d9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,6 +22,8 @@ config X86 def_bool y select HAVE_AOUT if X86_32 select HAVE_UNSTABLE_SCHED_CLOCK + select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_WANTS_PROT_NUMA_PROT_NONE select HAVE_IDE select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 31f8a3af7d94..ed8638c29b3e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -397,6 +397,17 @@ struct mm_struct { #endif #ifdef CONFIG_CPUMASK_OFFSTACK struct cpumask cpumask_allocation; +#endif +#ifdef CONFIG_NUMA_BALANCING + /* + * numa_next_scan is the next time when the PTEs will me marked + * pte_numa to gather statistics and migrate pages to new nodes + * if necessary + */ + unsigned long numa_next_scan; + + /* numa_scan_seq prevents two threads setting pte_numa */ + int numa_scan_seq; #endif struct uprobes_state uprobes_state; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 0dd42a02df2e..844af5b12cb2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1479,6 +1479,14 @@ struct task_struct { short il_next; short pref_node_fork; #endif +#ifdef CONFIG_NUMA_BALANCING + int numa_scan_seq; + int numa_migrate_seq; + unsigned int numa_scan_period; + u64 node_stamp; /* migration stamp */ + struct callback_head numa_work; +#endif /* CONFIG_NUMA_BALANCING */ + struct rcu_head rcu; /* @@ -1553,6 +1561,14 @@ struct task_struct { /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) +#ifdef CONFIG_NUMA_BALANCING +extern void task_numa_fault(int node, int pages); +#else +static inline void task_numa_fault(int node, int pages) +{ +} +#endif + /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH @@ -1990,6 +2006,10 @@ enum sched_tunable_scaling { }; extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; +extern unsigned int sysctl_numa_balancing_scan_period_min; +extern unsigned int sysctl_numa_balancing_scan_period_max; +extern unsigned int sysctl_numa_balancing_settle_count; + #ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d8927fda712..cad0d092ce3b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1533,6 +1533,19 @@ static void __sched_fork(struct task_struct *p) #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); #endif + +#ifdef CONFIG_NUMA_BALANCING + if (p->mm && atomic_read(&p->mm->mm_users) == 1) { + p->mm->numa_next_scan = jiffies; + p->mm->numa_scan_seq = 0; + } + + p->node_stamp = 0ULL; + p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; + p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0; + p->numa_scan_period = sysctl_numa_balancing_scan_period_min; + p->numa_work.next = &p->numa_work; +#endif /* CONFIG_NUMA_BALANCING */ } /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b800a14b990..6831abb5dbef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include @@ -776,6 +778,126 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) * Scheduling class queueing methods: */ +#ifdef CONFIG_NUMA_BALANCING +/* + * numa task sample period in ms: 5s + */ +unsigned int sysctl_numa_balancing_scan_period_min = 5000; +unsigned int sysctl_numa_balancing_scan_period_max = 5000*16; + +static void task_numa_placement(struct task_struct *p) +{ + int seq = ACCESS_ONCE(p->mm->numa_scan_seq); + + if (p->numa_scan_seq == seq) + return; + p->numa_scan_seq = seq; + + /* FIXME: Scheduling placement policy hints go here */ +} + +/* + * Got a PROT_NONE fault for a page on @node. + */ +void task_numa_fault(int node, int pages) +{ + struct task_struct *p = current; + + /* FIXME: Allocate task-specific structure for placement policy here */ + + task_numa_placement(p); +} + +/* + * The expensive part of numa migration is done from task_work context. + * Triggered from task_tick_numa(). + */ +void task_numa_work(struct callback_head *work) +{ + unsigned long migrate, next_scan, now = jiffies; + struct task_struct *p = current; + struct mm_struct *mm = p->mm; + + WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); + + work->next = work; /* protect against double add */ + /* + * Who cares about NUMA placement when they're dying. + * + * NOTE: make sure not to dereference p->mm before this check, + * exit_task_work() happens _after_ exit_mm() so we could be called + * without p->mm even though we still had it when we enqueued this + * work. + */ + if (p->flags & PF_EXITING) + return; + + /* + * Enforce maximal scan/migration frequency.. + */ + migrate = mm->numa_next_scan; + if (time_before(now, migrate)) + return; + + if (p->numa_scan_period == 0) + p->numa_scan_period = sysctl_numa_balancing_scan_period_min; + + next_scan = now + 2*msecs_to_jiffies(p->numa_scan_period); + if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) + return; + + ACCESS_ONCE(mm->numa_scan_seq)++; + { + struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!vma_migratable(vma)) + continue; + change_prot_numa(vma, vma->vm_start, vma->vm_end); + } + up_read(&mm->mmap_sem); + } +} + +/* + * Drive the periodic memory faults.. + */ +void task_tick_numa(struct rq *rq, struct task_struct *curr) +{ + struct callback_head *work = &curr->numa_work; + u64 period, now; + + /* + * We don't care about NUMA placement if we don't have memory. + */ + if (!curr->mm || (curr->flags & PF_EXITING) || work->next != work) + return; + + /* + * Using runtime rather than walltime has the dual advantage that + * we (mostly) drive the selection from busy threads and that the + * task needs to have done some actual work before we bother with + * NUMA placement. + */ + now = curr->se.sum_exec_runtime; + period = (u64)curr->numa_scan_period * NSEC_PER_MSEC; + + if (now - curr->node_stamp > period) { + curr->node_stamp = now; + + if (!time_before(jiffies, curr->mm->numa_next_scan)) { + init_task_work(work, task_numa_work); /* TODO: move this into sched_fork() */ + task_work_add(curr, work, true); + } + } +} +#else +static void task_tick_numa(struct rq *rq, struct task_struct *curr) +{ +} +#endif /* CONFIG_NUMA_BALANCING */ + static void account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) { @@ -4954,6 +5076,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) cfs_rq = cfs_rq_of(se); entity_tick(cfs_rq, se, queued); } + + if (sched_feat_numa(NUMA)) + task_tick_numa(rq, curr); } /* diff --git a/kernel/sched/features.h b/kernel/sched/features.h index eebefcad7027..5fb7aefbec80 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -61,3 +61,10 @@ SCHED_FEAT(TTWU_QUEUE, true) SCHED_FEAT(FORCE_SD_OVERLAP, false) SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) + +/* + * Apply the automatic NUMA scheduling policy + */ +#ifdef CONFIG_NUMA_BALANCING +SCHED_FEAT(NUMA, true) +#endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7a7db09cfabc..ae31c051ff2f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -648,6 +648,12 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */ +#ifdef CONFIG_NUMA_BALANCING +#define sched_feat_numa(x) sched_feat(x) +#else +#define sched_feat_numa(x) (0) +#endif + static inline u64 global_rt_period(void) { return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 26f65eaa01f9..025e1ae50ef1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -256,9 +256,11 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */ static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int min_wakeup_granularity_ns; /* 0 usecs */ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ +#ifdef CONFIG_SMP static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; -#endif +#endif /* CONFIG_SMP */ +#endif /* CONFIG_SCHED_DEBUG */ #ifdef CONFIG_COMPACTION static int min_extfrag_threshold; @@ -301,6 +303,7 @@ static struct ctl_table kern_table[] = { .extra1 = &min_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns, }, +#ifdef CONFIG_SMP { .procname = "sched_tunable_scaling", .data = &sysctl_sched_tunable_scaling, @@ -347,7 +350,24 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, -#endif +#endif /* CONFIG_SMP */ +#ifdef CONFIG_NUMA_BALANCING + { + .procname = "numa_balancing_scan_period_min_ms", + .data = &sysctl_numa_balancing_scan_period_min, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "numa_balancing_scan_period_max_ms", + .data = &sysctl_numa_balancing_scan_period_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif /* CONFIG_NUMA_BALANCING */ +#endif /* CONFIG_SCHED_DEBUG */ { .procname = "sched_rt_period_us", .data = &sysctl_sched_rt_period, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d79f7a55bf6f..ee8133794a56 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1046,6 +1046,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, */ split_huge_page(page); put_page(page); + return 0; clear_pmdnuma: @@ -1060,8 +1061,10 @@ clear_pmdnuma: out_unlock: spin_unlock(&mm->page_table_lock); - if (page) + if (page) { put_page(page); + task_numa_fault(numa_node_id(), HPAGE_PMD_NR); + } return 0; } diff --git a/mm/memory.c b/mm/memory.c index d52542680e10..8012c1907895 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3454,7 +3454,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page = NULL; spinlock_t *ptl; - int current_nid, target_nid; + int current_nid = -1; + int target_nid; /* * The "pte" at this point cannot be used safely without @@ -3501,6 +3502,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, current_nid = target_nid; out: + task_numa_fault(current_nid, 1); return 0; } @@ -3537,6 +3539,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { pte_t pteval = *pte; struct page *page; + int curr_nid; if (!pte_present(pteval)) continue; if (!pte_numa(pteval)) @@ -3554,6 +3557,15 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, page = vm_normal_page(vma, addr, pteval); if (unlikely(!page)) continue; + /* only check non-shared pages */ + if (unlikely(page_mapcount(page) != 1)) + continue; + pte_unmap_unlock(pte, ptl); + + curr_nid = page_to_nid(page); + task_numa_fault(curr_nid, 1); + + pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } pte_unmap_unlock(orig_pte, ptl); -- cgit v1.2.3 From 6e5fb223e89dbe5cb5c563f8d4a4a0a7d62455a8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Oct 2012 14:16:45 +0200 Subject: mm: sched: numa: Implement constant, per task Working Set Sampling (WSS) rate Previously, to probe the working set of a task, we'd use a very simple and crude method: mark all of its address space PROT_NONE. That method has various (obvious) disadvantages: - it samples the working set at dissimilar rates, giving some tasks a sampling quality advantage over others. - creates performance problems for tasks with very large working sets - over-samples processes with large address spaces but which only very rarely execute Improve that method by keeping a rotating offset into the address space that marks the current position of the scan, and advance it by a constant rate (in a CPU cycles execution proportional manner). If the offset reaches the last mapped address of the mm then it then it starts over at the first address. The per-task nature of the working set sampling functionality in this tree allows such constant rate, per task, execution-weight proportional sampling of the working set, with an adaptive sampling interval/frequency that goes from once per 100ms up to just once per 8 seconds. The current sampling volume is 256 MB per interval. As tasks mature and converge their working set, so does the sampling rate slow down to just a trickle, 256 MB per 8 seconds of CPU time executed. This, beyond being adaptive, also rate-limits rarely executing systems and does not over-sample on overloaded systems. [ In AutoNUMA speak, this patch deals with the effective sampling rate of the 'hinting page fault'. AutoNUMA's scanning is currently rate-limited, but it is also fundamentally single-threaded, executing in the knuma_scand kernel thread, so the limit in AutoNUMA is global and does not scale up with the number of CPUs, nor does it scan tasks in an execution proportional manner. So the idea of rate-limiting the scanning was first implemented in the AutoNUMA tree via a global rate limit. This patch goes beyond that by implementing an execution rate proportional working set sampling rate that is not implemented via a single global scanning daemon. ] [ Dan Carpenter pointed out a possible NULL pointer dereference in the first version of this patch. ] Based-on-idea-by: Andrea Arcangeli Bug-Found-By: Dan Carpenter Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Rik van Riel [ Wrote changelog and fixed bug. ] Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/mm_types.h | 3 +++ include/linux/sched.h | 1 + kernel/sched/fair.c | 65 ++++++++++++++++++++++++++++++++++++++---------- kernel/sysctl.c | 7 ++++++ 4 files changed, 63 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ed8638c29b3e..d1e246c5e50c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -406,6 +406,9 @@ struct mm_struct { */ unsigned long numa_next_scan; + /* Restart point for scanning and setting pte_numa */ + unsigned long numa_scan_offset; + /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 844af5b12cb2..37841958d234 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2008,6 +2008,7 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_max; +extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_numa_balancing_settle_count; #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6831abb5dbef..0a349dd1fa60 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -780,10 +780,13 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) #ifdef CONFIG_NUMA_BALANCING /* - * numa task sample period in ms: 5s + * numa task sample period in ms */ -unsigned int sysctl_numa_balancing_scan_period_min = 5000; -unsigned int sysctl_numa_balancing_scan_period_max = 5000*16; +unsigned int sysctl_numa_balancing_scan_period_min = 100; +unsigned int sysctl_numa_balancing_scan_period_max = 100*16; + +/* Portion of address space to scan in MB */ +unsigned int sysctl_numa_balancing_scan_size = 256; static void task_numa_placement(struct task_struct *p) { @@ -808,6 +811,12 @@ void task_numa_fault(int node, int pages) task_numa_placement(p); } +static void reset_ptenuma_scan(struct task_struct *p) +{ + ACCESS_ONCE(p->mm->numa_scan_seq)++; + p->mm->numa_scan_offset = 0; +} + /* * The expensive part of numa migration is done from task_work context. * Triggered from task_tick_numa(). @@ -817,6 +826,9 @@ void task_numa_work(struct callback_head *work) unsigned long migrate, next_scan, now = jiffies; struct task_struct *p = current; struct mm_struct *mm = p->mm; + struct vm_area_struct *vma; + unsigned long offset, end; + long length; WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); @@ -846,18 +858,45 @@ void task_numa_work(struct callback_head *work) if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) return; - ACCESS_ONCE(mm->numa_scan_seq)++; - { - struct vm_area_struct *vma; + offset = mm->numa_scan_offset; + length = sysctl_numa_balancing_scan_size; + length <<= 20; - down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (!vma_migratable(vma)) - continue; - change_prot_numa(vma, vma->vm_start, vma->vm_end); - } - up_read(&mm->mmap_sem); + down_read(&mm->mmap_sem); + vma = find_vma(mm, offset); + if (!vma) { + reset_ptenuma_scan(p); + offset = 0; + vma = mm->mmap; + } + for (; vma && length > 0; vma = vma->vm_next) { + if (!vma_migratable(vma)) + continue; + + /* Skip small VMAs. They are not likely to be of relevance */ + if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR) + continue; + + offset = max(offset, vma->vm_start); + end = min(ALIGN(offset + length, HPAGE_SIZE), vma->vm_end); + length -= end - offset; + + change_prot_numa(vma, offset, end); + + offset = end; } + + /* + * It is possible to reach the end of the VMA list but the last few VMAs are + * not guaranteed to the vma_migratable. If they are not, we would find the + * !migratable VMA on the next scan but not reset the scanner to the start + * so check it now. + */ + if (vma) + mm->numa_scan_offset = offset; + else + reset_ptenuma_scan(p); + up_read(&mm->mmap_sem); } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 025e1ae50ef1..7d3a2e0475e5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -366,6 +366,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "numa_balancing_scan_size_mb", + .data = &sysctl_numa_balancing_scan_size, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ { -- cgit v1.2.3 From 4b96a29ba891dd59734cb7be80a900fe93aa2d9f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Oct 2012 14:16:47 +0200 Subject: mm: sched: numa: Implement slow start for working set sampling Add a 1 second delay before starting to scan the working set of a task and starting to balance it amongst nodes. [ note that before the constant per task WSS sampling rate patch the initial scan would happen much later still, in effect that patch caused this regression. ] The theory is that short-run tasks benefit very little from NUMA placement: they come and go, and they better stick to the node they were started on. As tasks mature and rebalance to other CPUs and nodes, so does their NUMA placement have to change and so does it start to matter more and more. In practice this change fixes an observable kbuild regression: # [ a perf stat --null --repeat 10 test of ten bzImage builds to /dev/shm ] !NUMA: 45.291088843 seconds time elapsed ( +- 0.40% ) 45.154231752 seconds time elapsed ( +- 0.36% ) +NUMA, no slow start: 46.172308123 seconds time elapsed ( +- 0.30% ) 46.343168745 seconds time elapsed ( +- 0.25% ) +NUMA, 1 sec slow start: 45.224189155 seconds time elapsed ( +- 0.25% ) 45.160866532 seconds time elapsed ( +- 0.17% ) and it also fixes an observable perf bench (hackbench) regression: # perf stat --null --repeat 10 perf bench sched messaging -NUMA: -NUMA: 0.246225691 seconds time elapsed ( +- 1.31% ) +NUMA no slow start: 0.252620063 seconds time elapsed ( +- 1.13% ) +NUMA 1sec delay: 0.248076230 seconds time elapsed ( +- 1.35% ) The implementation is simple and straightforward, most of the patch deals with adding the /proc/sys/kernel/numa_balancing_scan_delay_ms tunable knob. Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Rik van Riel [ Wrote the changelog, ran measurements, tuned the default. ] Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/sched.h | 1 + kernel/sched/core.c | 2 +- kernel/sched/fair.c | 5 +++++ kernel/sysctl.c | 7 +++++++ 4 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 37841958d234..7d95a232b5b9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2006,6 +2006,7 @@ enum sched_tunable_scaling { }; extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; +extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_max; extern unsigned int sysctl_numa_balancing_scan_size; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cad0d092ce3b..fbfc4843063f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1543,7 +1543,7 @@ static void __sched_fork(struct task_struct *p) p->node_stamp = 0ULL; p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0; - p->numa_scan_period = sysctl_numa_balancing_scan_period_min; + p->numa_scan_period = sysctl_numa_balancing_scan_delay; p->numa_work.next = &p->numa_work; #endif /* CONFIG_NUMA_BALANCING */ } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f6e1f25ed2bd..7727b0161579 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -788,6 +788,9 @@ unsigned int sysctl_numa_balancing_scan_period_max = 100*16; /* Portion of address space to scan in MB */ unsigned int sysctl_numa_balancing_scan_size = 256; +/* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */ +unsigned int sysctl_numa_balancing_scan_delay = 1000; + static void task_numa_placement(struct task_struct *p) { int seq = ACCESS_ONCE(p->mm->numa_scan_seq); @@ -929,6 +932,8 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr) period = (u64)curr->numa_scan_period * NSEC_PER_MSEC; if (now - curr->node_stamp > period) { + if (!curr->node_stamp) + curr->numa_scan_period = sysctl_numa_balancing_scan_period_min; curr->node_stamp = now; if (!time_before(jiffies, curr->mm->numa_next_scan)) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7d3a2e0475e5..48a68cc258c1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -352,6 +352,13 @@ static struct ctl_table kern_table[] = { }, #endif /* CONFIG_SMP */ #ifdef CONFIG_NUMA_BALANCING + { + .procname = "numa_balancing_scan_delay_ms", + .data = &sysctl_numa_balancing_scan_delay, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "numa_balancing_scan_period_min_ms", .data = &sysctl_numa_balancing_scan_period_min, -- cgit v1.2.3 From 03c5a6e16322c997bf8f264851bfa3f532ad515f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 2 Nov 2012 14:52:48 +0000 Subject: mm: numa: Add pte updates, hinting and migration stats It is tricky to quantify the basic cost of automatic NUMA placement in a meaningful manner. This patch adds some vmstats that can be used as part of a basic costing model. u = basic unit = sizeof(void *) Ca = cost of struct page access = sizeof(struct page) / u Cpte = Cost PTE access = Ca Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock) where Cpte is incurred twice for a read and a write and Wlock is a constant representing the cost of taking or releasing a lock Cnumahint = Cost of a minor page fault = some high constant e.g. 1000 Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u Ci = Cost of page isolation = Ca + Wi where Wi is a constant that should reflect the approximate cost of the locking operation Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma) where Wnuma is the approximate NUMA factor. 1 is local. 1.2 would imply that remote accesses are 20% more expensive Balancing cost = Cpte * numa_pte_updates + Cnumahint * numa_hint_faults + Ci * numa_pages_migrated + Cpagecopy * numa_pages_migrated Note that numa_pages_migrated is used as a measure of how many pages were isolated even though it would miss pages that failed to migrate. A vmstat counter could have been added for it but the isolation cost is pretty marginal in comparison to the overall cost so it seemed overkill. The ideal way to measure automatic placement benefit would be to count the number of remote accesses versus local accesses and do something like benefit = (remote_accesses_before - remove_access_after) * Wnuma but the information is not readily available. As a workload converges, the expection would be that the number of remote numa hints would reduce to 0. convergence = numa_hint_faults_local / numa_hint_faults where this is measured for the last N number of numa hints recorded. When the workload is fully converged the value is 1. This can measure if the placement policy is converging and how fast it is doing it. Signed-off-by: Mel Gorman Acked-by: Rik van Riel --- include/linux/vm_event_item.h | 6 ++++++ include/linux/vmstat.h | 8 ++++++++ mm/huge_memory.c | 5 +++++ mm/memory.c | 12 ++++++++++++ mm/mempolicy.c | 2 ++ mm/migrate.c | 3 ++- mm/vmstat.c | 6 ++++++ 7 files changed, 41 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index a1f750b8e72a..55600049e794 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -38,6 +38,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, KSWAPD_SKIP_CONGESTION_WAIT, PAGEOUTRUN, ALLOCSTALL, PGROTATED, +#ifdef CONFIG_NUMA_BALANCING + NUMA_PTE_UPDATES, + NUMA_HINT_FAULTS, + NUMA_HINT_FAULTS_LOCAL, + NUMA_PAGE_MIGRATE, +#endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, #endif diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 92a86b2cce33..a13291f7da88 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu) #endif /* CONFIG_VM_EVENT_COUNTERS */ +#ifdef CONFIG_NUMA_BALANCING +#define count_vm_numa_event(x) count_vm_event(x) +#define count_vm_numa_events(x, y) count_vm_events(x, y) +#else +#define count_vm_numa_event(x) do {} while (0) +#define count_vm_numa_events(x, y) do {} while (0) +#endif /* CONFIG_NUMA_BALANCING */ + #define __count_zone_vm_events(item, zone, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ zone_idx(zone), delta) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ee8133794a56..f3a477fffd09 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1026,6 +1026,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page = NULL; unsigned long haddr = addr & HPAGE_PMD_MASK; int target_nid; + int current_nid = -1; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) @@ -1034,6 +1035,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, page = pmd_page(pmd); get_page(page); spin_unlock(&mm->page_table_lock); + current_nid = page_to_nid(page); + count_vm_numa_event(NUMA_HINT_FAULTS); + if (current_nid == numa_node_id()) + count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); target_nid = mpol_misplaced(page, vma, haddr); if (target_nid == -1) diff --git a/mm/memory.c b/mm/memory.c index 8012c1907895..8a7b4ccbe136 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3477,6 +3477,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); update_mmu_cache(vma, addr, ptep); + count_vm_numa_event(NUMA_HINT_FAULTS); page = vm_normal_page(vma, addr, pte); if (!page) { pte_unmap_unlock(ptep, ptl); @@ -3485,6 +3486,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, get_page(page); current_nid = page_to_nid(page); + if (current_nid == numa_node_id()) + count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); target_nid = mpol_misplaced(page, vma, addr); pte_unmap_unlock(ptep, ptl); if (target_nid == -1) { @@ -3517,6 +3520,9 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long offset; spinlock_t *ptl; bool numa = false; + int local_nid = numa_node_id(); + unsigned long nr_faults = 0; + unsigned long nr_faults_local = 0; spin_lock(&mm->page_table_lock); pmd = *pmdp; @@ -3565,10 +3571,16 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, curr_nid = page_to_nid(page); task_numa_fault(curr_nid, 1); + nr_faults++; + if (curr_nid == local_nid) + nr_faults_local++; + pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } pte_unmap_unlock(orig_pte, ptl); + count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults); + count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local); return 0; } #else diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a7a62fe7c280..516491fbfaa8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -583,6 +583,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE); nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1); + if (nr_updated) + count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); return nr_updated; } diff --git a/mm/migrate.c b/mm/migrate.c index c7d550011a64..23bba5d6edff 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1514,7 +1514,8 @@ int migrate_misplaced_page(struct page *page, int node) if (nr_remaining) { putback_lru_pages(&migratepages); isolated = 0; - } + } else + count_vm_numa_event(NUMA_PAGE_MIGRATE); } BUG_ON(!list_empty(&migratepages)); out: diff --git a/mm/vmstat.c b/mm/vmstat.c index 3a067fabe190..c0f1f6db5182 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -774,6 +774,12 @@ const char * const vmstat_text[] = { "pgrotated", +#ifdef CONFIG_NUMA_BALANCING + "numa_pte_updates", + "numa_hint_faults", + "numa_hint_faults_local", + "numa_pages_migrated", +#endif #ifdef CONFIG_MIGRATION "pgmigrate_success", "pgmigrate_fail", -- cgit v1.2.3 From 8177a420ed7c16c171ed3c3aec5b0676db38c247 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 23 Mar 2012 20:56:34 +0100 Subject: mm: numa: Structures for Migrate On Fault per NUMA migration rate limiting This defines the per-node data used by Migrate On Fault in order to rate limit the migration. The rate limiting is applied independently to each destination node. Signed-off-by: Andrea Arcangeli Signed-off-by: Mel Gorman --- include/linux/mmzone.h | 13 +++++++++++++ mm/page_alloc.c | 5 +++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a23923ba8263..28601fdfcdb2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -717,6 +717,19 @@ typedef struct pglist_data { struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ int kswapd_max_order; enum zone_type classzone_idx; +#ifdef CONFIG_NUMA_BALANCING + /* + * Lock serializing the per destination node AutoNUMA memory + * migration rate limiting data. + */ + spinlock_t numabalancing_migrate_lock; + + /* Rate limiting time interval */ + unsigned long numabalancing_migrate_next_window; + + /* Number of pages migrated during the rate limiting time interval */ + unsigned long numabalancing_migrate_nr_pages; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5953dc2d196f..ef025e20dbee 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4449,6 +4449,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, int ret; pgdat_resize_init(pgdat); +#ifdef CONFIG_NUMA_BALANCING + spin_lock_init(&pgdat->numabalancing_migrate_lock); + pgdat->numabalancing_migrate_nr_pages = 0; + pgdat->numabalancing_migrate_next_window = jiffies; +#endif init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); pgdat_page_cgroup_init(pgdat); -- cgit v1.2.3 From e14808b49f55e0e1135da5e4a154a540dd9f3662 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 19 Nov 2012 10:59:15 +0000 Subject: mm: numa: Rate limit setting of pte_numa if node is saturated If there are a large number of NUMA hinting faults and all of them are resulting in migrations it may indicate that memory is just bouncing uselessly around. NUMA balancing cost is likely exceeding any benefit from locality. Rate limit the PTE updates if the node is migration rate-limited. As noted in the comments, this distorts the NUMA faulting statistics. Signed-off-by: Mel Gorman --- include/linux/migrate.h | 6 ++++++ kernel/sched/fair.c | 9 +++++++++ mm/migrate.c | 20 ++++++++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index f0d0313eea6f..91556889adac 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -77,11 +77,17 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #ifdef CONFIG_NUMA_BALANCING extern int migrate_misplaced_page(struct page *page, int node); +extern int migrate_misplaced_page(struct page *page, int node); +extern bool migrate_ratelimited(int node); #else static inline int migrate_misplaced_page(struct page *page, int node) { return -EAGAIN; /* can't migrate now */ } +static inline bool migrate_ratelimited(int node) +{ + return false; +} #endif /* CONFIG_NUMA_BALANCING */ #endif /* _LINUX_MIGRATE_H */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7727b0161579..37e895a941ab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -861,6 +862,14 @@ void task_numa_work(struct callback_head *work) if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) return; + /* + * Do not set pte_numa if the current running node is rate-limited. + * This loses statistics on the fault but if we are unwilling to + * migrate to this node, it is less likely we can do useful work + */ + if (migrate_ratelimited(numa_node_id())) + return; + start = mm->numa_scan_offset; pages = sysctl_numa_balancing_scan_size; pages <<= 20 - PAGE_SHIFT; /* MB in pages */ diff --git a/mm/migrate.c b/mm/migrate.c index 4b8267f1842f..32a1afca6009 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1464,10 +1464,30 @@ static struct page *alloc_misplaced_dst_page(struct page *page, * page migration rate limiting control. * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs * window of time. Default here says do not migrate more than 1280M per second. + * If a node is rate-limited then PTE NUMA updates are also rate-limited. However + * as it is faults that reset the window, pte updates will happen unconditionally + * if there has not been a fault since @pteupdate_interval_millisecs after the + * throttle window closed. */ static unsigned int migrate_interval_millisecs __read_mostly = 100; +static unsigned int pteupdate_interval_millisecs __read_mostly = 1000; static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); +/* Returns true if NUMA migration is currently rate limited */ +bool migrate_ratelimited(int node) +{ + pg_data_t *pgdat = NODE_DATA(node); + + if (time_after(jiffies, pgdat->numabalancing_migrate_next_window + + msecs_to_jiffies(pteupdate_interval_millisecs))) + return false; + + if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages) + return false; + + return true; +} + /* * Attempt to migrate a misplaced page to the specified destination * node. Caller is expected to have an elevated reference count on -- cgit v1.2.3 From 57e0a0309160b1b4ebde9f3c6a867cd96ac368bf Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 12 Nov 2012 09:06:20 +0000 Subject: mm: numa: Introduce last_nid to the page frame This patch introduces a last_nid field to the page struct. This is used to build a two-stage filter in the next patch that is aimed at mitigating a problem whereby pages migrate to the wrong node when referenced by a process that was running off its home node. Signed-off-by: Mel Gorman --- include/linux/mm.h | 30 ++++++++++++++++++++++++++++++ include/linux/mm_types.h | 4 ++++ mm/page_alloc.c | 2 ++ 3 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d04c2f0aab36..d87f9ec4a145 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -693,6 +693,36 @@ static inline int page_to_nid(const struct page *page) } #endif +#ifdef CONFIG_NUMA_BALANCING +static inline int page_xchg_last_nid(struct page *page, int nid) +{ + return xchg(&page->_last_nid, nid); +} + +static inline int page_last_nid(struct page *page) +{ + return page->_last_nid; +} +static inline void reset_page_last_nid(struct page *page) +{ + page->_last_nid = -1; +} +#else +static inline int page_xchg_last_nid(struct page *page, int nid) +{ + return page_to_nid(page); +} + +static inline int page_last_nid(struct page *page) +{ + return page_to_nid(page); +} + +static inline void reset_page_last_nid(struct page *page) +{ +} +#endif + static inline struct zone *page_zone(const struct page *page) { return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d1e246c5e50c..c5fffa239861 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -175,6 +175,10 @@ struct page { */ void *shadow; #endif + +#ifdef CONFIG_NUMA_BALANCING + int _last_nid; +#endif } /* * The struct page can be forced to be double word aligned so that atomic ops diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ef025e20dbee..73f226a1206e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -608,6 +608,7 @@ static inline int free_pages_check(struct page *page) bad_page(page); return 1; } + reset_page_last_nid(page); if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; return 0; @@ -3826,6 +3827,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, mminit_verify_page_links(page, zone, nid, pfn); init_page_count(page); reset_page_mapcount(page); + reset_page_last_nid(page); SetPageReserved(page); /* * Mark the block movable so that blocks are reserved for -- cgit v1.2.3 From b8593bfda1652755136333cdd362de125b283a9c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 21 Nov 2012 01:18:23 +0000 Subject: mm: sched: Adapt the scanning rate if a NUMA hinting fault does not migrate The PTE scanning rate and fault rates are two of the biggest sources of system CPU overhead with automatic NUMA placement. Ideally a proper policy would detect if a workload was properly placed, schedule and adjust the PTE scanning rate accordingly. We do not track the necessary information to do that but we at least know if we migrated or not. This patch scans slower if a page was not migrated as the result of a NUMA hinting fault up to sysctl_numa_balancing_scan_period_max which is now higher than the previous default. Once every minute it will reset the scanner in case of phase changes. This is hilariously crude and the numbers are arbitrary. Workloads will converge quite slowly in comparison to what a proper policy should be able to do. On the plus side, we will chew up less CPU for workloads that have no need for automatic balancing. Signed-off-by: Mel Gorman --- include/linux/mm_types.h | 3 +++ include/linux/sched.h | 5 +++-- kernel/sched/core.c | 1 + kernel/sched/fair.c | 29 +++++++++++++++++++++-------- kernel/sysctl.c | 7 +++++++ mm/huge_memory.c | 2 +- mm/memory.c | 12 ++++++++---- 7 files changed, 44 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c5fffa239861..e850a23dd6ec 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -410,6 +410,9 @@ struct mm_struct { */ unsigned long numa_next_scan; + /* numa_next_reset is when the PTE scanner period will be reset */ + unsigned long numa_next_reset; + /* Restart point for scanning and setting pte_numa */ unsigned long numa_scan_offset; diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d95a232b5b9..0f4ff2bd03f6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1562,9 +1562,9 @@ struct task_struct { #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) #ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int node, int pages); +extern void task_numa_fault(int node, int pages, bool migrated); #else -static inline void task_numa_fault(int node, int pages) +static inline void task_numa_fault(int node, int pages, bool migrated) { } #endif @@ -2009,6 +2009,7 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_max; +extern unsigned int sysctl_numa_balancing_scan_period_reset; extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_numa_balancing_settle_count; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fbfc4843063f..9d255bc0e278 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1537,6 +1537,7 @@ static void __sched_fork(struct task_struct *p) #ifdef CONFIG_NUMA_BALANCING if (p->mm && atomic_read(&p->mm->mm_users) == 1) { p->mm->numa_next_scan = jiffies; + p->mm->numa_next_reset = jiffies; p->mm->numa_scan_seq = 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dd18087fd369..4b577863933f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -784,7 +784,8 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) * numa task sample period in ms */ unsigned int sysctl_numa_balancing_scan_period_min = 100; -unsigned int sysctl_numa_balancing_scan_period_max = 100*16; +unsigned int sysctl_numa_balancing_scan_period_max = 100*50; +unsigned int sysctl_numa_balancing_scan_period_reset = 100*600; /* Portion of address space to scan in MB */ unsigned int sysctl_numa_balancing_scan_size = 256; @@ -806,20 +807,19 @@ static void task_numa_placement(struct task_struct *p) /* * Got a PROT_NONE fault for a page on @node. */ -void task_numa_fault(int node, int pages) +void task_numa_fault(int node, int pages, bool migrated) { struct task_struct *p = current; /* FIXME: Allocate task-specific structure for placement policy here */ /* - * Assume that as faults occur that pages are getting properly placed - * and fewer NUMA hints are required. Note that this is a big - * assumption, it assumes processes reach a steady steady with no - * further phase changes. + * If pages are properly placed (did not migrate) then scan slower. + * This is reset periodically in case of phase changes */ - p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max, - p->numa_scan_period + jiffies_to_msecs(2)); + if (!migrated) + p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max, + p->numa_scan_period + jiffies_to_msecs(10)); task_numa_placement(p); } @@ -857,6 +857,19 @@ void task_numa_work(struct callback_head *work) if (p->flags & PF_EXITING) return; + /* + * Reset the scan period if enough time has gone by. Objective is that + * scanning will be reduced if pages are properly placed. As tasks + * can enter different phases this needs to be re-examined. Lacking + * proper tracking of reference behaviour, this blunt hammer is used. + */ + migrate = mm->numa_next_reset; + if (time_after(now, migrate)) { + p->numa_scan_period = sysctl_numa_balancing_scan_period_min; + next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset); + xchg(&mm->numa_next_reset, next_scan); + } + /* * Enforce maximal scan/migration frequency.. */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 48a68cc258c1..8906f90d6fa2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -366,6 +366,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "numa_balancing_scan_period_reset", + .data = &sysctl_numa_balancing_scan_period_reset, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "numa_balancing_scan_period_max_ms", .data = &sysctl_numa_balancing_scan_period_max, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 79b96064f8fc..199b261a257e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1068,7 +1068,7 @@ out_unlock: spin_unlock(&mm->page_table_lock); if (page) { put_page(page); - task_numa_fault(numa_node_id(), HPAGE_PMD_NR); + task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false); } return 0; } diff --git a/mm/memory.c b/mm/memory.c index 84c6d9eab182..39edb11b63dc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3468,6 +3468,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; int current_nid = -1; int target_nid; + bool migrated = false; /* * The "pte" at this point cannot be used safely without @@ -3509,12 +3510,13 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, } /* Migrate to the requested node */ - if (migrate_misplaced_page(page, target_nid)) + migrated = migrate_misplaced_page(page, target_nid); + if (migrated) current_nid = target_nid; out: if (current_nid != -1) - task_numa_fault(current_nid, 1); + task_numa_fault(current_nid, 1, migrated); return 0; } @@ -3554,6 +3556,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; int curr_nid = local_nid; int target_nid; + bool migrated; if (!pte_present(pteval)) continue; if (!pte_numa(pteval)) @@ -3590,9 +3593,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Migrate to the requested node */ pte_unmap_unlock(pte, ptl); - if (migrate_misplaced_page(page, target_nid)) + migrated = migrate_misplaced_page(page, target_nid); + if (migrated) curr_nid = target_nid; - task_numa_fault(curr_nid, 1); + task_numa_fault(curr_nid, 1, migrated); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } -- cgit v1.2.3 From 1a687c2e9a99335c9e77392f050fe607fa18a652 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 22 Nov 2012 11:16:36 +0000 Subject: mm: sched: numa: Control enabling and disabling of NUMA balancing This patch adds Kconfig options and kernel parameters to allow the enabling and disabling of automatic NUMA balancing. The existance of such a switch was and is very important when debugging problems related to transparent hugepages and we should have the same for automatic NUMA placement. Signed-off-by: Mel Gorman --- Documentation/kernel-parameters.txt | 3 +++ include/linux/sched.h | 4 ++++ init/Kconfig | 8 +++++++ kernel/sched/core.c | 48 +++++++++++++++++++++++++------------ kernel/sched/fair.c | 3 +++ kernel/sched/features.h | 6 +++-- mm/mempolicy.c | 46 +++++++++++++++++++++++++++++++++++ 7 files changed, 101 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9776f068306b..2e8d2625b814 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nr_uarts= [SERIAL] maximum number of UARTs to be registered. + numa_balancing= [KNL,X86] Enable or disable automatic NUMA balancing. + Allowed values are enable and disable + numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. one of ['zone', 'node', 'default'] can be specified This can be set from sysctl after boot. diff --git a/include/linux/sched.h b/include/linux/sched.h index 0f4ff2bd03f6..b1e619f9ff1a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1563,10 +1563,14 @@ struct task_struct { #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int node, int pages, bool migrated); +extern void set_numabalancing_state(bool enabled); #else static inline void task_numa_fault(int node, int pages, bool migrated) { } +static inline void set_numabalancing_state(bool enabled) +{ +} #endif /* diff --git a/init/Kconfig b/init/Kconfig index 9f00f004796a..18e2a5920a34 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE depends on ARCH_WANTS_PROT_NUMA_PROT_NONE depends on NUMA_BALANCING +config NUMA_BALANCING_DEFAULT_ENABLED + bool "Automatically enable NUMA aware memory/task placement" + default y + depends on NUMA_BALANCING + help + If set, autonumic NUMA balancing will be enabled if running on a NUMA + machine. + config NUMA_BALANCING bool "Memory placement aware NUMA scheduler" default y diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9d255bc0e278..7a45015274ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { }; static void sched_feat_enable(int i) { }; #endif /* HAVE_JUMP_LABEL */ -static ssize_t -sched_feat_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int sched_feat_set(char *cmp) { - char buf[64]; - char *cmp; - int neg = 0; int i; - - if (cnt > 63) - cnt = 63; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - cmp = strstrip(buf); + int neg = 0; if (strncmp(cmp, "NO_", 3) == 0) { neg = 1; @@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf, } } + return i; +} + +static ssize_t +sched_feat_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + char *cmp; + int i; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + cmp = strstrip(buf); + + i = sched_feat_set(cmp); if (i == __SCHED_FEAT_NR) return -EINVAL; @@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p) #endif /* CONFIG_NUMA_BALANCING */ } +#ifdef CONFIG_NUMA_BALANCING +void set_numabalancing_state(bool enabled) +{ + if (enabled) + sched_feat_set("NUMA"); + else + sched_feat_set("NO_NUMA"); +} +#endif /* CONFIG_NUMA_BALANCING */ + /* * fork()/clone()-time setup: */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4b577863933f..7a02a2082e95 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated) { struct task_struct *p = current; + if (!sched_feat_numa(NUMA)) + return; + /* FIXME: Allocate task-specific structure for placement policy here */ /* diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 5fb7aefbec80..d2373a3e3252 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) /* - * Apply the automatic NUMA scheduling policy + * Apply the automatic NUMA scheduling policy. Enabled automatically + * at runtime if running on a NUMA machine. Can be controlled via + * numa_balancing= */ #ifdef CONFIG_NUMA_BALANCING -SCHED_FEAT(NUMA, true) +SCHED_FEAT(NUMA, false) #endif diff --git a/mm/mempolicy.c b/mm/mempolicy.c index fd20e28fd2ad..046308e9b999 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p) mutex_unlock(&p->mutex); } +#ifdef CONFIG_NUMA_BALANCING +static bool __initdata numabalancing_override; + +static void __init check_numabalancing_enable(void) +{ + bool numabalancing_default = false; + + if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED)) + numabalancing_default = true; + + if (nr_node_ids > 1 && !numabalancing_override) { + printk(KERN_INFO "Enabling automatic NUMA balancing. " + "Configure with numa_balancing= or sysctl"); + set_numabalancing_state(numabalancing_default); + } +} + +static int __init setup_numabalancing(char *str) +{ + int ret = 0; + if (!str) + goto out; + numabalancing_override = true; + + if (!strcmp(str, "enable")) { + set_numabalancing_state(true); + ret = 1; + } else if (!strcmp(str, "disable")) { + set_numabalancing_state(false); + ret = 1; + } +out: + if (!ret) + printk(KERN_WARNING "Unable to parse numa_balancing=\n"); + + return ret; +} +__setup("numa_balancing=", setup_numabalancing); +#else +static inline void __init check_numabalancing_enable(void) +{ +} +#endif /* CONFIG_NUMA_BALANCING */ + /* assumes fs == KERNEL_DS */ void __init numa_policy_init(void) { @@ -2571,6 +2615,8 @@ void __init numa_policy_init(void) if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) printk("numa_policy_init: interleaving failed\n"); + + check_numabalancing_enable(); } /* Reset policy of current process to default */ -- cgit v1.2.3 From 5bca23035391928c4c7301835accca3551b96cc2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 22 Nov 2012 14:40:03 +0000 Subject: mm: sched: numa: Delay PTE scanning until a task is scheduled on a new node Due to the fact that migrations are driven by the CPU a task is running on there is no point tracking NUMA faults until one task runs on a new node. This patch tracks the first node used by an address space. Until it changes, PTE scanning is disabled and no NUMA hinting faults are trapped. This should help workloads that are short-lived, do not care about NUMA placement or have bound themselves to a single node. This takes advantage of the logic in "mm: sched: numa: Implement slow start for working set sampling" to delay when the checks are made. This will take advantage of processes that set their CPU and node bindings early in their lifetime. It will also potentially allow any initial load balancing to take place. Signed-off-by: Mel Gorman --- include/linux/mm_types.h | 10 ++++++++++ kernel/fork.c | 3 +++ kernel/sched/fair.c | 18 ++++++++++++++++++ kernel/sched/features.h | 4 +++- 4 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e850a23dd6ec..197422a1598c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -418,10 +418,20 @@ struct mm_struct { /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; + + /* + * The first node a task was scheduled on. If a task runs on + * a different node than Make PTE Scan Go Now. + */ + int first_nid; #endif struct uprobes_state uprobes_state; }; +/* first nid will either be a valid NID or one of these values */ +#define NUMA_PTE_SCAN_INIT -1 +#define NUMA_PTE_SCAN_ACTIVE -2 + static inline void mm_init_cpumask(struct mm_struct *mm) { #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/kernel/fork.c b/kernel/fork.c index 8b20ab7d3aa2..296ea308096d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -820,6 +820,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; +#endif +#ifdef CONFIG_NUMA_BALANCING + mm->first_nid = NUMA_PTE_SCAN_INIT; #endif if (!mm_init(mm, tsk)) goto fail_nomem; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a02a2082e95..3e18f611a5aa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -860,6 +860,24 @@ void task_numa_work(struct callback_head *work) if (p->flags & PF_EXITING) return; + /* + * We do not care about task placement until a task runs on a node + * other than the first one used by the address space. This is + * largely because migrations are driven by what CPU the task + * is running on. If it's never scheduled on another node, it'll + * not migrate so why bother trapping the fault. + */ + if (mm->first_nid == NUMA_PTE_SCAN_INIT) + mm->first_nid = numa_node_id(); + if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) { + /* Are we running on a new node yet? */ + if (numa_node_id() == mm->first_nid && + !sched_feat_numa(NUMA_FORCE)) + return; + + mm->first_nid = NUMA_PTE_SCAN_ACTIVE; + } + /* * Reset the scan period if enough time has gone by. Objective is that * scanning will be reduced if pages are properly placed. As tasks diff --git a/kernel/sched/features.h b/kernel/sched/features.h index d2373a3e3252..e7c25fff1e94 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -65,8 +65,10 @@ SCHED_FEAT(LB_MIN, false) /* * Apply the automatic NUMA scheduling policy. Enabled automatically * at runtime if running on a NUMA machine. Can be controlled via - * numa_balancing= + * numa_balancing=. Allow PTE scanning to be forced on UMA machines + * for debugging the core machinery. */ #ifdef CONFIG_NUMA_BALANCING SCHED_FEAT(NUMA, false) +SCHED_FEAT(NUMA_FORCE, false) #endif -- cgit v1.2.3 From b32967ff101a7508f70be8de59b278d4df92fa00 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 19 Nov 2012 12:35:47 +0000 Subject: mm: numa: Add THP migration for the NUMA working set scanning fault case. Note: This is very heavily based on a patch from Peter Zijlstra with fixes from Ingo Molnar, Hugh Dickins and Johannes Weiner. That patch put a lot of migration logic into mm/huge_memory.c where it does not belong. This version puts tries to share some of the migration logic with migrate_misplaced_page. However, it should be noted that now migrate.c is doing more with the pagetable manipulation than is preferred. The end result is barely recognisable so as before, the signed-offs had to be removed but will be re-added if the original authors are ok with it. Add THP migration for the NUMA working set scanning fault case. It uses the page lock to serialize. No migration pte dance is necessary because the pte is already unmapped when we decide to migrate. [dhillf@gmail.com: Fix memory leak on isolation failure] [dhillf@gmail.com: Fix transfer of last_nid information] Signed-off-by: Mel Gorman --- include/linux/migrate.h | 15 ++++ mm/huge_memory.c | 59 +++++++++---- mm/internal.h | 7 +- mm/memcontrol.c | 7 +- mm/migrate.c | 231 +++++++++++++++++++++++++++++++++++++++--------- 5 files changed, 255 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 91556889adac..51eac4bdc606 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -79,6 +79,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, extern int migrate_misplaced_page(struct page *page, int node); extern int migrate_misplaced_page(struct page *page, int node); extern bool migrate_ratelimited(int node); +extern int migrate_misplaced_transhuge_page(struct mm_struct *mm, + struct vm_area_struct *vma, + pmd_t *pmd, pmd_t entry, + unsigned long address, + struct page *page, int node); + #else static inline int migrate_misplaced_page(struct page *page, int node) { @@ -88,6 +94,15 @@ static inline bool migrate_ratelimited(int node) { return false; } + +static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, + struct vm_area_struct *vma, + pmd_t *pmd, pmd_t entry, + unsigned long address, + struct page *page, int node) +{ + return -EAGAIN; +} #endif /* CONFIG_NUMA_BALANCING */ #endif /* _LINUX_MIGRATE_H */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 199b261a257e..711baf84b153 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -600,7 +600,7 @@ out: } __setup("transparent_hugepage=", setup_transparent_hugepage); -static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) +pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { if (likely(vma->vm_flags & VM_WRITE)) pmd = pmd_mkwrite(pmd); @@ -1023,10 +1023,12 @@ out: int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp) { - struct page *page = NULL; + struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; int target_nid; int current_nid = -1; + bool migrated; + bool page_locked = false; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) @@ -1034,42 +1036,61 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, page = pmd_page(pmd); get_page(page); - spin_unlock(&mm->page_table_lock); current_nid = page_to_nid(page); count_vm_numa_event(NUMA_HINT_FAULTS); if (current_nid == numa_node_id()) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); target_nid = mpol_misplaced(page, vma, haddr); - if (target_nid == -1) + if (target_nid == -1) { + put_page(page); goto clear_pmdnuma; + } - /* - * Due to lacking code to migrate thp pages, we'll split - * (which preserves the special PROT_NONE) and re-take the - * fault on the normal pages. - */ - split_huge_page(page); - put_page(page); - - return 0; + /* Acquire the page lock to serialise THP migrations */ + spin_unlock(&mm->page_table_lock); + lock_page(page); + page_locked = true; -clear_pmdnuma: + /* Confirm the PTE did not while locked */ spin_lock(&mm->page_table_lock); - if (unlikely(!pmd_same(pmd, *pmdp))) + if (unlikely(!pmd_same(pmd, *pmdp))) { + unlock_page(page); + put_page(page); goto out_unlock; + } + spin_unlock(&mm->page_table_lock); + + /* Migrate the THP to the requested node */ + migrated = migrate_misplaced_transhuge_page(mm, vma, + pmdp, pmd, addr, + page, target_nid); + if (migrated) + current_nid = target_nid; + else { + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(pmd, *pmdp))) { + unlock_page(page); + goto out_unlock; + } + goto clear_pmdnuma; + } + + task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); + return 0; +clear_pmdnuma: pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); VM_BUG_ON(pmd_numa(*pmdp)); update_mmu_cache_pmd(vma, addr, pmdp); + if (page_locked) + unlock_page(page); out_unlock: spin_unlock(&mm->page_table_lock); - if (page) { - put_page(page); - task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false); - } + if (current_nid != -1) + task_numa_fault(current_nid, HPAGE_PMD_NR, migrated); return 0; } diff --git a/mm/internal.h b/mm/internal.h index a4fa284f6bc2..7e60ac826f2b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -212,15 +212,18 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) { if (TestClearPageMlocked(page)) { unsigned long flags; + int nr_pages = hpage_nr_pages(page); local_irq_save(flags); - __dec_zone_page_state(page, NR_MLOCK); + __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); SetPageMlocked(newpage); - __inc_zone_page_state(newpage, NR_MLOCK); + __mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages); local_irq_restore(flags); } } +extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dd39ba000b31..d97af9636ab2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3288,15 +3288,18 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, struct mem_cgroup **memcgp) { struct mem_cgroup *memcg = NULL; + unsigned int nr_pages = 1; struct page_cgroup *pc; enum charge_type ctype; *memcgp = NULL; - VM_BUG_ON(PageTransHuge(page)); if (mem_cgroup_disabled()) return; + if (PageTransHuge(page)) + nr_pages <<= compound_order(page); + pc = lookup_page_cgroup(page); lock_page_cgroup(pc); if (PageCgroupUsed(pc)) { @@ -3358,7 +3361,7 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, * charged to the res_counter since we plan on replacing the * old one and only one page is going to be left afterwards. */ - __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false); + __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false); } /* remove redundant charge if migration failed*/ diff --git a/mm/migrate.c b/mm/migrate.c index 2a5ce135eef0..c9400960fd52 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, */ void migrate_page_copy(struct page *newpage, struct page *page) { - if (PageHuge(page)) + if (PageHuge(page) || PageTransHuge(page)) copy_huge_page(newpage, page); else copy_highpage(newpage, page); @@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node) return true; } -/* - * Attempt to migrate a misplaced page to the specified destination - * node. Caller is expected to have an elevated reference count on - * the page that will be dropped by this function before returning. - */ -int migrate_misplaced_page(struct page *page, int node) +/* Returns true if the node is migrate rate-limited after the update */ +bool numamigrate_update_ratelimit(pg_data_t *pgdat) { - pg_data_t *pgdat = NODE_DATA(node); - int isolated = 0; - LIST_HEAD(migratepages); - - /* - * Don't migrate pages that are mapped in multiple processes. - * TODO: Handle false sharing detection instead of this hammer - */ - if (page_mapcount(page) != 1) { - put_page(page); - goto out; - } + bool rate_limited = false; /* * Rate-limit the amount of data that is being migrated to a node. @@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node) pgdat->numabalancing_migrate_next_window = jiffies + msecs_to_jiffies(migrate_interval_millisecs); } - if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { - spin_unlock(&pgdat->numabalancing_migrate_lock); - put_page(page); - goto out; - } - pgdat->numabalancing_migrate_nr_pages++; + if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) + rate_limited = true; + else + pgdat->numabalancing_migrate_nr_pages++; spin_unlock(&pgdat->numabalancing_migrate_lock); + + return rate_limited; +} + +int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) +{ + int ret = 0; /* Avoid migrating to a node that is nearly full */ if (migrate_balanced_pgdat(pgdat, 1)) { @@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node) if (isolate_lru_page(page)) { put_page(page); - goto out; + return 0; } - isolated = 1; + /* Page is isolated */ + ret = 1; page_lru = page_is_file_cache(page); - inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); - list_add(&page->lru, &migratepages); + if (!PageTransHuge(page)) + inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru); + else + mod_zone_page_state(page_zone(page), + NR_ISOLATED_ANON + page_lru, + HPAGE_PMD_NR); } /* @@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node) */ put_page(page); - if (isolated) { - int nr_remaining; - - nr_remaining = migrate_pages(&migratepages, - alloc_misplaced_dst_page, - node, false, MIGRATE_ASYNC, - MR_NUMA_MISPLACED); - if (nr_remaining) { - putback_lru_pages(&migratepages); - isolated = 0; - } else - count_vm_numa_event(NUMA_PAGE_MIGRATE); + return ret; +} + +/* + * Attempt to migrate a misplaced page to the specified destination + * node. Caller is expected to have an elevated reference count on + * the page that will be dropped by this function before returning. + */ +int migrate_misplaced_page(struct page *page, int node) +{ + pg_data_t *pgdat = NODE_DATA(node); + int isolated = 0; + int nr_remaining; + LIST_HEAD(migratepages); + + /* + * Don't migrate pages that are mapped in multiple processes. + * TODO: Handle false sharing detection instead of this hammer + */ + if (page_mapcount(page) != 1) { + put_page(page); + goto out; } + + /* + * Rate-limit the amount of data that is being migrated to a node. + * Optimal placement is no good if the memory bus is saturated and + * all the time is being spent migrating! + */ + if (numamigrate_update_ratelimit(pgdat)) { + put_page(page); + goto out; + } + + isolated = numamigrate_isolate_page(pgdat, page); + if (!isolated) + goto out; + + list_add(&page->lru, &migratepages); + nr_remaining = migrate_pages(&migratepages, + alloc_misplaced_dst_page, + node, false, MIGRATE_ASYNC, + MR_NUMA_MISPLACED); + if (nr_remaining) { + putback_lru_pages(&migratepages); + isolated = 0; + } else + count_vm_numa_event(NUMA_PAGE_MIGRATE); BUG_ON(!list_empty(&migratepages)); out: return isolated; } + +int migrate_misplaced_transhuge_page(struct mm_struct *mm, + struct vm_area_struct *vma, + pmd_t *pmd, pmd_t entry, + unsigned long address, + struct page *page, int node) +{ + unsigned long haddr = address & HPAGE_PMD_MASK; + pg_data_t *pgdat = NODE_DATA(node); + int isolated = 0; + struct page *new_page = NULL; + struct mem_cgroup *memcg = NULL; + int page_lru = page_is_file_cache(page); + + /* + * Don't migrate pages that are mapped in multiple processes. + * TODO: Handle false sharing detection instead of this hammer + */ + if (page_mapcount(page) != 1) + goto out_dropref; + + /* + * Rate-limit the amount of data that is being migrated to a node. + * Optimal placement is no good if the memory bus is saturated and + * all the time is being spent migrating! + */ + if (numamigrate_update_ratelimit(pgdat)) + goto out_dropref; + + new_page = alloc_pages_node(node, + (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); + if (!new_page) + goto out_dropref; + page_xchg_last_nid(new_page, page_last_nid(page)); + + isolated = numamigrate_isolate_page(pgdat, page); + if (!isolated) { + put_page(new_page); + goto out_keep_locked; + } + + /* Prepare a page as a migration target */ + __set_page_locked(new_page); + SetPageSwapBacked(new_page); + + /* anon mapping, we can simply copy page->mapping to the new page: */ + new_page->mapping = page->mapping; + new_page->index = page->index; + migrate_page_copy(new_page, page); + WARN_ON(PageLRU(new_page)); + + /* Recheck the target PMD */ + spin_lock(&mm->page_table_lock); + if (unlikely(!pmd_same(*pmd, entry))) { + spin_unlock(&mm->page_table_lock); + + /* Reverse changes made by migrate_page_copy() */ + if (TestClearPageActive(new_page)) + SetPageActive(page); + if (TestClearPageUnevictable(new_page)) + SetPageUnevictable(page); + mlock_migrate_page(page, new_page); + + unlock_page(new_page); + put_page(new_page); /* Free it */ + + unlock_page(page); + putback_lru_page(page); + + count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); + goto out; + } + + /* + * Traditional migration needs to prepare the memcg charge + * transaction early to prevent the old page from being + * uncharged when installing migration entries. Here we can + * save the potential rollback and start the charge transfer + * only when migration is already known to end successfully. + */ + mem_cgroup_prepare_migration(page, new_page, &memcg); + + entry = mk_pmd(new_page, vma->vm_page_prot); + entry = pmd_mknonnuma(entry); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = pmd_mkhuge(entry); + + page_add_new_anon_rmap(new_page, vma, haddr); + + set_pmd_at(mm, haddr, pmd, entry); + update_mmu_cache_pmd(vma, address, entry); + page_remove_rmap(page); + /* + * Finish the charge transaction under the page table lock to + * prevent split_huge_page() from dividing up the charge + * before it's fully transferred to the new page. + */ + mem_cgroup_end_migration(memcg, page, new_page, true); + spin_unlock(&mm->page_table_lock); + + unlock_page(new_page); + unlock_page(page); + put_page(page); /* Drop the rmap reference */ + put_page(page); /* Drop the LRU isolation reference */ + + count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); + count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); + +out: + mod_zone_page_state(page_zone(page), + NR_ISOLATED_ANON + page_lru, + -HPAGE_PMD_NR); + return isolated; + +out_dropref: + put_page(page); +out_keep_locked: + return 0; +} #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA */ -- cgit v1.2.3 From 220018d388b8ab1fca1c5f0c6474bab47ad2c9c0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 5 Dec 2012 09:32:56 +0000 Subject: mm: numa: Add THP migration for the NUMA working set scanning fault case build fix Commit "Add THP migration for the NUMA working set scanning fault case" breaks the build because HPAGE_PMD_SHIFT and HPAGE_PMD_MASK defined to explode without CONFIG_TRANSPARENT_HUGEPAGE: mm/migrate.c: In function 'migrate_misplaced_transhuge_page_put': mm/migrate.c:1549: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed mm/migrate.c:1564: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed mm/migrate.c:1566: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed mm/migrate.c:1573: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed mm/migrate.c:1606: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed mm/migrate.c:1648: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed CONFIG_NUMA_BALANCING allows compilation without enabling transparent hugepages, so define the dummy function for such a configuration and only define migrate_misplaced_transhuge_page_put() when transparent hugepages are enabled. Signed-off-by: David Rientjes Signed-off-by: Mel Gorman --- include/linux/migrate.h | 16 +++++++++------- mm/migrate.c | 2 ++ 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 51eac4bdc606..d52afb9a790c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -79,12 +79,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, extern int migrate_misplaced_page(struct page *page, int node); extern int migrate_misplaced_page(struct page *page, int node); extern bool migrate_ratelimited(int node); -extern int migrate_misplaced_transhuge_page(struct mm_struct *mm, - struct vm_area_struct *vma, - pmd_t *pmd, pmd_t entry, - unsigned long address, - struct page *page, int node); - #else static inline int migrate_misplaced_page(struct page *page, int node) { @@ -94,7 +88,15 @@ static inline bool migrate_ratelimited(int node) { return false; } +#endif /* CONFIG_NUMA_BALANCING */ +#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +extern int migrate_misplaced_transhuge_page(struct mm_struct *mm, + struct vm_area_struct *vma, + pmd_t *pmd, pmd_t entry, + unsigned long address, + struct page *page, int node); +#else static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd, pmd_t entry, @@ -103,6 +105,6 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, { return -EAGAIN; } -#endif /* CONFIG_NUMA_BALANCING */ +#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/ #endif /* _LINUX_MIGRATE_H */ diff --git a/mm/migrate.c b/mm/migrate.c index c9400960fd52..9341a501d168 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1602,7 +1602,9 @@ int migrate_misplaced_page(struct page *page, int node) out: return isolated; } +#endif /* CONFIG_NUMA_BALANCING */ +#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) int migrate_misplaced_transhuge_page(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd, pmd_t entry, -- cgit v1.2.3 From 5a505085f043e8380f83610f79642853c051e2f1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 2 Dec 2012 19:56:46 +0000 Subject: mm/rmap: Convert the struct anon_vma::mutex to an rwsem Convert the struct anon_vma::mutex to an rwsem, which will help in solving a page-migration scalability problem. (Addressed in a separate patch.) The conversion is simple and straightforward: in every case where we mutex_lock()ed we'll now down_write(). Suggested-by: Linus Torvalds Reviewed-by: Rik van Riel Cc: Peter Zijlstra Cc: Paul Turner Cc: Lee Schermerhorn Cc: Christoph Lameter Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Hugh Dickins Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/linux/rmap.h | 16 ++++++++-------- mm/huge_memory.c | 4 ++-- mm/mmap.c | 8 ++++---- mm/rmap.c | 22 +++++++++++----------- 4 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bfe1f4780644..f3f41d242e25 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include /* @@ -25,8 +25,8 @@ * pointing to this anon_vma once its vma list is empty. */ struct anon_vma { - struct anon_vma *root; /* Root of this anon_vma tree */ - struct mutex mutex; /* Serialize access to vma list */ + struct anon_vma *root; /* Root of this anon_vma tree */ + struct rw_semaphore rwsem; /* W: modification, R: walking the list */ /* * The refcount is taken on an anon_vma when there is no * guarantee that the vma of page tables will exist for @@ -64,7 +64,7 @@ struct anon_vma_chain { struct vm_area_struct *vma; struct anon_vma *anon_vma; struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ - struct rb_node rb; /* locked by anon_vma->mutex */ + struct rb_node rb; /* locked by anon_vma->rwsem */ unsigned long rb_subtree_last; #ifdef CONFIG_DEBUG_VM_RB unsigned long cached_vma_start, cached_vma_last; @@ -108,24 +108,24 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; if (anon_vma) - mutex_lock(&anon_vma->root->mutex); + down_write(&anon_vma->root->rwsem); } static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; if (anon_vma) - mutex_unlock(&anon_vma->root->mutex); + up_write(&anon_vma->root->rwsem); } static inline void anon_vma_lock(struct anon_vma *anon_vma) { - mutex_lock(&anon_vma->root->mutex); + down_write(&anon_vma->root->rwsem); } static inline void anon_vma_unlock(struct anon_vma *anon_vma) { - mutex_unlock(&anon_vma->root->mutex); + up_write(&anon_vma->root->rwsem); } /* diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 711baf84b153..acd37fe55eb7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1292,7 +1292,7 @@ static int __split_huge_page_splitting(struct page *page, * We can't temporarily set the pmd to null in order * to split it, the pmd must remain marked huge at all * times or the VM won't take the pmd_trans_huge paths - * and it won't wait on the anon_vma->root->mutex to + * and it won't wait on the anon_vma->root->rwsem to * serialize against split_huge_page*. */ pmdp_splitting_flush(vma, address, pmd); @@ -1495,7 +1495,7 @@ static int __split_huge_page_map(struct page *page, return ret; } -/* must be called with anon_vma->root->mutex hold */ +/* must be called with anon_vma->root->rwsem held */ static void __split_huge_page(struct page *page, struct anon_vma *anon_vma) { diff --git a/mm/mmap.c b/mm/mmap.c index 9a796c41e7d9..88408632da66 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2561,15 +2561,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. */ - mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem); + down_write(&anon_vma->root->rwsem); /* * We can safely modify head.next after taking the - * anon_vma->root->mutex. If some other vma in this mm shares + * anon_vma->root->rwsem. If some other vma in this mm shares * the same anon_vma we won't take it again. * * No need of atomic instructions here, head.next * can't change from under us thanks to the - * anon_vma->root->mutex. + * anon_vma->root->rwsem. */ if (__test_and_set_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) @@ -2671,7 +2671,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) * * No need of atomic instructions here, head.next * can't change from under us until we release the - * anon_vma->root->mutex. + * anon_vma->root->rwsem. */ if (!__test_and_clear_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) diff --git a/mm/rmap.c b/mm/rmap.c index 2ee1ef0f317b..6e3ee3b82798 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -24,7 +24,7 @@ * mm->mmap_sem * page->flags PG_locked (lock_page) * mapping->i_mmap_mutex - * anon_vma->mutex + * anon_vma->rwsem * mm->page_table_lock or pte_lock * zone->lru_lock (in mark_page_accessed, isolate_lru_page) * swap_lock (in swap_duplicate, swap_info_get) @@ -37,7 +37,7 @@ * in arch-dependent flush_dcache_mmap_lock, * within bdi.wb->list_lock in __sync_single_inode) * - * anon_vma->mutex,mapping->i_mutex (memory_failure, collect_procs_anon) + * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) * ->tasklist_lock * pte map lock */ @@ -103,7 +103,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma) * LOCK should suffice since the actual taking of the lock must * happen _before_ what follows. */ - if (mutex_is_locked(&anon_vma->root->mutex)) { + if (rwsem_is_locked(&anon_vma->root->rwsem)) { anon_vma_lock(anon_vma); anon_vma_unlock(anon_vma); } @@ -219,9 +219,9 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct struct anon_vma *new_root = anon_vma->root; if (new_root != root) { if (WARN_ON_ONCE(root)) - mutex_unlock(&root->mutex); + up_write(&root->rwsem); root = new_root; - mutex_lock(&root->mutex); + down_write(&root->rwsem); } return root; } @@ -229,7 +229,7 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct static inline void unlock_anon_vma_root(struct anon_vma *root) { if (root) - mutex_unlock(&root->mutex); + up_write(&root->rwsem); } /* @@ -349,7 +349,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) /* * Iterate the list once more, it now only contains empty and unlinked * anon_vmas, destroy them. Could not do before due to __put_anon_vma() - * needing to acquire the anon_vma->root->mutex. + * needing to write-acquire the anon_vma->root->rwsem. */ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; @@ -365,7 +365,7 @@ static void anon_vma_ctor(void *data) { struct anon_vma *anon_vma = data; - mutex_init(&anon_vma->mutex); + init_rwsem(&anon_vma->rwsem); atomic_set(&anon_vma->refcount, 0); anon_vma->rb_root = RB_ROOT; } @@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page) anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); root_anon_vma = ACCESS_ONCE(anon_vma->root); - if (mutex_trylock(&root_anon_vma->mutex)) { + if (down_write_trylock(&root_anon_vma->rwsem)) { /* * If the page is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will * not go away, see anon_vma_free(). */ if (!page_mapped(page)) { - mutex_unlock(&root_anon_vma->mutex); + up_write(&root_anon_vma->rwsem); anon_vma = NULL; } goto out; @@ -1299,7 +1299,7 @@ out_mlock: /* * We need mmap_sem locking, Otherwise VM_LOCKED check makes * unstable result and race. Plus, We can't wait here because - * we now hold anon_vma->mutex or mapping->i_mmap_mutex. + * we now hold anon_vma->rwsem or mapping->i_mmap_mutex. * if trylock failed, the page remain in evictable lru and later * vmscan could retry to move the page to unevictable lru if the * page is actually mlocked. -- cgit v1.2.3 From 4fc3f1d66b1ef0d7b8dc11f4ff1cc510f78b37d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 2 Dec 2012 19:56:50 +0000 Subject: mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable rmap_walk_anon() and try_to_unmap_anon() appears to be too careful about locking the anon vma: while it needs protection against anon vma list modifications, it does not need exclusive access to the list itself. Transforming this exclusive lock to a read-locked rwsem removes a global lock from the hot path of page-migration intense threaded workloads which can cause pathological performance like this: 96.43% process 0 [kernel.kallsyms] [k] perf_trace_sched_switch | --- perf_trace_sched_switch __schedule schedule schedule_preempt_disabled __mutex_lock_common.isra.6 __mutex_lock_slowpath mutex_lock | |--50.61%-- rmap_walk | move_to_new_page | migrate_pages | migrate_misplaced_page | __do_numa_page.isra.69 | handle_pte_fault | handle_mm_fault | __do_page_fault | do_page_fault | page_fault | __memset_sse2 | | | --100.00%-- worker_thread | | | --100.00%-- start_thread | --49.39%-- page_lock_anon_vma try_to_unmap_anon try_to_unmap migrate_pages migrate_misplaced_page __do_numa_page.isra.69 handle_pte_fault handle_mm_fault __do_page_fault do_page_fault page_fault __memset_sse2 | --100.00%-- worker_thread start_thread With this change applied the profile is now nicely flat and there's no anon-vma related scheduling/blocking. Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(), to make it clearer that it's an exclusive write-lock in that case - suggested by Rik van Riel. Suggested-by: Linus Torvalds Cc: Peter Zijlstra Cc: Paul Turner Cc: Lee Schermerhorn Cc: Christoph Lameter Cc: Rik van Riel Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Hugh Dickins Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/linux/huge_mm.h | 2 +- include/linux/rmap.h | 17 ++++++++++++++--- mm/huge_memory.c | 6 +++--- mm/ksm.c | 6 +++--- mm/memory-failure.c | 4 ++-- mm/migrate.c | 2 +- mm/mmap.c | 2 +- mm/mremap.c | 2 +- mm/rmap.c | 48 ++++++++++++++++++++++++------------------------ 9 files changed, 50 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 027ad04ef3a8..0d1208c0bdc4 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -102,7 +102,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); #define wait_split_huge_page(__anon_vma, __pmd) \ do { \ pmd_t *____pmd = (__pmd); \ - anon_vma_lock(__anon_vma); \ + anon_vma_lock_write(__anon_vma); \ anon_vma_unlock(__anon_vma); \ BUG_ON(pmd_trans_splitting(*____pmd) || \ pmd_trans_huge(*____pmd)); \ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index f3f41d242e25..c20635c527a9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -118,7 +118,7 @@ static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) up_write(&anon_vma->root->rwsem); } -static inline void anon_vma_lock(struct anon_vma *anon_vma) +static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { down_write(&anon_vma->root->rwsem); } @@ -128,6 +128,17 @@ static inline void anon_vma_unlock(struct anon_vma *anon_vma) up_write(&anon_vma->root->rwsem); } +static inline void anon_vma_lock_read(struct anon_vma *anon_vma) +{ + down_read(&anon_vma->root->rwsem); +} + +static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) +{ + up_read(&anon_vma->root->rwsem); +} + + /* * anon_vma helper functions. */ @@ -220,8 +231,8 @@ int try_to_munlock(struct page *); /* * Called by memory-failure.c to kill processes. */ -struct anon_vma *page_lock_anon_vma(struct page *page); -void page_unlock_anon_vma(struct anon_vma *anon_vma); +struct anon_vma *page_lock_anon_vma_read(struct page *page); +void page_unlock_anon_vma_read(struct anon_vma *anon_vma); int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); /* diff --git a/mm/huge_memory.c b/mm/huge_memory.c index acd37fe55eb7..a24c9cb9c83e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1549,7 +1549,7 @@ int split_huge_page(struct page *page) int ret = 1; BUG_ON(!PageAnon(page)); - anon_vma = page_lock_anon_vma(page); + anon_vma = page_lock_anon_vma_read(page); if (!anon_vma) goto out; ret = 0; @@ -1562,7 +1562,7 @@ int split_huge_page(struct page *page) BUG_ON(PageCompound(page)); out_unlock: - page_unlock_anon_vma(anon_vma); + page_unlock_anon_vma_read(anon_vma); out: return ret; } @@ -2074,7 +2074,7 @@ static void collapse_huge_page(struct mm_struct *mm, if (!pmd_present(*pmd) || pmd_trans_huge(*pmd)) goto out; - anon_vma_lock(vma->anon_vma); + anon_vma_lock_write(vma->anon_vma); pte = pte_offset_map(pmd, address); ptl = pte_lockptr(mm, pmd); diff --git a/mm/ksm.c b/mm/ksm.c index ae539f0b8aa1..7fa37de1ee0c 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1634,7 +1634,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1688,7 +1688,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1741,7 +1741,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ddb68a169e45..f2cd830f66c0 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -402,7 +402,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, struct anon_vma *av; pgoff_t pgoff; - av = page_lock_anon_vma(page); + av = page_lock_anon_vma_read(page); if (av == NULL) /* Not actually mapped anymore */ return; @@ -423,7 +423,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, } } read_unlock(&tasklist_lock); - page_unlock_anon_vma(av); + page_unlock_anon_vma_read(av); } /* diff --git a/mm/migrate.c b/mm/migrate.c index f24e9cc49cc4..6e46485f014c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -754,7 +754,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, */ if (PageAnon(page)) { /* - * Only page_lock_anon_vma() understands the subtleties of + * Only page_lock_anon_vma_read() understands the subtleties of * getting a hold on an anon_vma from outside one of its mms. */ anon_vma = page_get_anon_vma(page); diff --git a/mm/mmap.c b/mm/mmap.c index 88408632da66..68a16b40c209 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -602,7 +602,7 @@ again: remove_next = 1 + (end > next->vm_end); if (anon_vma) { VM_BUG_ON(adjust_next && next->anon_vma && anon_vma != next->anon_vma); - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); anon_vma_interval_tree_pre_update_vma(vma); if (adjust_next) anon_vma_interval_tree_pre_update_vma(next); diff --git a/mm/mremap.c b/mm/mremap.c index 1b61c2d3307a..3dabd170753a 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -104,7 +104,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, } if (vma->anon_vma) { anon_vma = vma->anon_vma; - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); } } diff --git a/mm/rmap.c b/mm/rmap.c index 6e3ee3b82798..b0f612df7b9d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -87,24 +87,24 @@ static inline void anon_vma_free(struct anon_vma *anon_vma) VM_BUG_ON(atomic_read(&anon_vma->refcount)); /* - * Synchronize against page_lock_anon_vma() such that + * Synchronize against page_lock_anon_vma_read() such that * we can safely hold the lock without the anon_vma getting * freed. * * Relies on the full mb implied by the atomic_dec_and_test() from * put_anon_vma() against the acquire barrier implied by - * mutex_trylock() from page_lock_anon_vma(). This orders: + * down_read_trylock() from page_lock_anon_vma_read(). This orders: * - * page_lock_anon_vma() VS put_anon_vma() - * mutex_trylock() atomic_dec_and_test() + * page_lock_anon_vma_read() VS put_anon_vma() + * down_read_trylock() atomic_dec_and_test() * LOCK MB - * atomic_read() mutex_is_locked() + * atomic_read() rwsem_is_locked() * * LOCK should suffice since the actual taking of the lock must * happen _before_ what follows. */ if (rwsem_is_locked(&anon_vma->root->rwsem)) { - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); anon_vma_unlock(anon_vma); } @@ -146,7 +146,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, * allocate a new one. * * Anon-vma allocations are very subtle, because we may have - * optimistically looked up an anon_vma in page_lock_anon_vma() + * optimistically looked up an anon_vma in page_lock_anon_vma_read() * and that may actually touch the spinlock even in the newly * allocated vma (it depends on RCU to make sure that the * anon_vma isn't actually destroyed). @@ -181,7 +181,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) allocated = anon_vma; } - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); /* page_table_lock to protect against threads */ spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { @@ -306,7 +306,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) get_anon_vma(anon_vma->root); /* Mark this anon_vma as the one where our new (COWed) pages go. */ vma->anon_vma = anon_vma; - anon_vma_lock(anon_vma); + anon_vma_lock_write(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); anon_vma_unlock(anon_vma); @@ -442,7 +442,7 @@ out: * atomic op -- the trylock. If we fail the trylock, we fall back to getting a * reference like with page_get_anon_vma() and then block on the mutex. */ -struct anon_vma *page_lock_anon_vma(struct page *page) +struct anon_vma *page_lock_anon_vma_read(struct page *page) { struct anon_vma *anon_vma = NULL; struct anon_vma *root_anon_vma; @@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page) anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); root_anon_vma = ACCESS_ONCE(anon_vma->root); - if (down_write_trylock(&root_anon_vma->rwsem)) { + if (down_read_trylock(&root_anon_vma->rwsem)) { /* * If the page is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will * not go away, see anon_vma_free(). */ if (!page_mapped(page)) { - up_write(&root_anon_vma->rwsem); + up_read(&root_anon_vma->rwsem); anon_vma = NULL; } goto out; @@ -484,15 +484,15 @@ struct anon_vma *page_lock_anon_vma(struct page *page) /* we pinned the anon_vma, its safe to sleep */ rcu_read_unlock(); - anon_vma_lock(anon_vma); + anon_vma_lock_read(anon_vma); if (atomic_dec_and_test(&anon_vma->refcount)) { /* * Oops, we held the last refcount, release the lock * and bail -- can't simply use put_anon_vma() because - * we'll deadlock on the anon_vma_lock() recursion. + * we'll deadlock on the anon_vma_lock_write() recursion. */ - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); __put_anon_vma(anon_vma); anon_vma = NULL; } @@ -504,9 +504,9 @@ out: return anon_vma; } -void page_unlock_anon_vma(struct anon_vma *anon_vma) +void page_unlock_anon_vma_read(struct anon_vma *anon_vma) { - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); } /* @@ -732,7 +732,7 @@ static int page_referenced_anon(struct page *page, struct anon_vma_chain *avc; int referenced = 0; - anon_vma = page_lock_anon_vma(page); + anon_vma = page_lock_anon_vma_read(page); if (!anon_vma) return referenced; @@ -754,7 +754,7 @@ static int page_referenced_anon(struct page *page, break; } - page_unlock_anon_vma(anon_vma); + page_unlock_anon_vma_read(anon_vma); return referenced; } @@ -1474,7 +1474,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) struct anon_vma_chain *avc; int ret = SWAP_AGAIN; - anon_vma = page_lock_anon_vma(page); + anon_vma = page_lock_anon_vma_read(page); if (!anon_vma) return ret; @@ -1501,7 +1501,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) break; } - page_unlock_anon_vma(anon_vma); + page_unlock_anon_vma_read(anon_vma); return ret; } @@ -1696,7 +1696,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, int ret = SWAP_AGAIN; /* - * Note: remove_migration_ptes() cannot use page_lock_anon_vma() + * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() * because that depends on page_mapped(); but not all its usages * are holding mmap_sem. Users without mmap_sem are required to * take a reference count to prevent the anon_vma disappearing @@ -1704,7 +1704,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, anon_vma = page_anon_vma(page); if (!anon_vma) return ret; - anon_vma_lock(anon_vma); + anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); @@ -1712,7 +1712,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, if (ret != SWAP_AGAIN) break; } - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); return ret; } -- cgit v1.2.3 From 1daa8a0b2000974e07eacbabecd61bccadc933b9 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 10 Dec 2012 20:41:27 +0100 Subject: of/i2c: add dummy inline functions for when CONFIG_OF_I2C(_MODULE) isn't defined If CONFIG_OF_I2C and CONFIG_OF_I2C_MODULE are undefined no declaration of of_find_i2c_device_by_node and of_find_i2c_adapter_by_node will be available. Add dummy inline functions to avoid compilation breakage. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Grant Likely --- include/linux/of_i2c.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h index 1cb775f8e663..cfb545cd86b5 100644 --- a/include/linux/of_i2c.h +++ b/include/linux/of_i2c.h @@ -29,6 +29,18 @@ static inline void of_i2c_register_devices(struct i2c_adapter *adap) { return; } + +static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) +{ + return NULL; +} + +/* must call put_device() when done with returned i2c_adapter device */ +static inline struct i2c_adapter *of_find_i2c_adapter_by_node( + struct device_node *node) +{ + return NULL; +} #endif /* CONFIG_OF_I2C */ #endif /* __LINUX_OF_I2C_H */ -- cgit v1.2.3 From 93c667ca2598bd84f1bd3f2fa176af69707699fe Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 10 Dec 2012 20:41:30 +0100 Subject: of: *node argument to of_parse_phandle_with_args should be const The "struct device_node *" argument of of_parse_phandle_with_args() can be const. Making this change makes it explicit that the function will not modify a node. Signed-off-by: Guennadi Liakhovetski [grant.likely: Resolved conflict with previous patch modifying of_parse_phandle()] Signed-off-by: Grant Likely --- drivers/of/base.c | 2 +- include/linux/of.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index 538e3cfad23e..be846408dbc1 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1025,7 +1025,7 @@ EXPORT_SYMBOL(of_parse_phandle); * To get a device_node of the `node2' node you may call this: * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args); */ -int of_parse_phandle_with_args(struct device_node *np, const char *list_name, +int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int index, struct of_phandle_args *out_args) { diff --git a/include/linux/of.h b/include/linux/of.h index 60053bd7e79a..6cfea9aa401f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -273,7 +273,7 @@ extern int of_modalias_node(struct device_node *node, char *modalias, int len); extern struct device_node *of_parse_phandle(const struct device_node *np, const char *phandle_name, int index); -extern int of_parse_phandle_with_args(struct device_node *np, +extern int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int index, struct of_phandle_args *out_args); -- cgit v1.2.3 From d8153d4d8b7b6141770e1416c4a338161205ed1b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:45 +0200 Subject: inotify, fanotify: replace fsnotify_put_group() with fsnotify_destroy_group() Currently in fsnotify_put_group() the ref count of a group is decremented and if it becomes 0 fsnotify_destroy_group() is called. Since a groups ref count is only at group creation set to 1 and never increased after that a call to fsnotify_put_group() always results in a call to fsnotify_destroy_group(). With this patch fsnotify_destroy_group() is called directly. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 14 +++++++------- fs/notify/group.c | 2 +- fs/notify/inotify/inotify_user.c | 8 +++----- include/linux/fsnotify_backend.h | 3 ++- 4 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index d43803669739..82ae6d783c14 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -415,7 +415,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) wake_up(&group->fanotify_data.access_waitq); #endif /* matches the fanotify_init->fsnotify_alloc_group */ - fsnotify_put_group(group); + fsnotify_destroy_group(group); return 0; } @@ -728,13 +728,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) break; default: fd = -EINVAL; - goto out_put_group; + goto out_destroy_group; } if (flags & FAN_UNLIMITED_QUEUE) { fd = -EPERM; if (!capable(CAP_SYS_ADMIN)) - goto out_put_group; + goto out_destroy_group; group->max_events = UINT_MAX; } else { group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; @@ -743,7 +743,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & FAN_UNLIMITED_MARKS) { fd = -EPERM; if (!capable(CAP_SYS_ADMIN)) - goto out_put_group; + goto out_destroy_group; group->fanotify_data.max_marks = UINT_MAX; } else { group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; @@ -751,12 +751,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) - goto out_put_group; + goto out_destroy_group; return fd; -out_put_group: - fsnotify_put_group(group); +out_destroy_group: + fsnotify_destroy_group(group); return fd; } diff --git a/fs/notify/group.c b/fs/notify/group.c index 63fc294a4692..cfda328c3d11 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -50,7 +50,7 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group) * situtation, the fsnotify_final_destroy_group will get called when that final * mark is freed. */ -static void fsnotify_destroy_group(struct fsnotify_group *group) +void fsnotify_destroy_group(struct fsnotify_group *group) { /* clear all inode marks for this group */ fsnotify_clear_marks_by_group(group); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc8985c..dbafbfc8ceca 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -293,10 +293,8 @@ static int inotify_release(struct inode *ignored, struct file *file) pr_debug("%s: group=%p\n", __func__, group); - fsnotify_clear_marks_by_group(group); - /* free this group, matching get was inotify_init->fsnotify_obtain_group */ - fsnotify_put_group(group); + fsnotify_destroy_group(group); return 0; } @@ -712,7 +710,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > inotify_max_user_instances) { - fsnotify_put_group(group); + fsnotify_destroy_group(group); return ERR_PTR(-EMFILE); } @@ -741,7 +739,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) ret = anon_inode_getfd("inotify", &inotify_fops, group, O_RDONLY | flags); if (ret < 0) - fsnotify_put_group(group); + fsnotify_destroy_group(group); return ret; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 63d966d5c2ea..d2ad345bdeec 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -364,7 +364,8 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); - +/* destroy group */ +extern void fsnotify_destroy_group(struct fsnotify_group *group); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -- cgit v1.2.3 From 986129520479d689962a42c31acdeaf854ac91f5 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:46 +0200 Subject: fsnotify: introduce fsnotify_get_group() Introduce fsnotify_get_group() which increments the reference counter of a group. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/group.c | 8 ++++++++ include/linux/fsnotify_backend.h | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/notify/group.c b/fs/notify/group.c index cfda328c3d11..1d57c35f1043 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -62,6 +62,14 @@ void fsnotify_destroy_group(struct fsnotify_group *group) fsnotify_final_destroy_group(group); } +/* + * Get reference to a group. + */ +void fsnotify_get_group(struct fsnotify_group *group) +{ + atomic_inc(&group->refcnt); +} + /* * Drop a reference to a group. Free it if it's through. */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d2ad345bdeec..e76cef75295d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -360,8 +360,10 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode /* called from fsnotify listeners, such as fanotify or dnotify */ -/* get a reference to an existing or create a new group */ +/* create a new group */ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); +/* get reference to a group */ +extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); /* destroy group */ -- cgit v1.2.3 From 986ab09807ca9454c3f54aae4db7e1bb00daeed3 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:50 +0200 Subject: fsnotify: use a mutex instead of a spinlock to protect a groups mark list Replaces the groups mark_lock spinlock with a mutex. Using a mutex instead of a spinlock results in more flexibility (i.e it allows to sleep while the lock is held). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/group.c | 2 +- fs/notify/inode_mark.c | 4 ++-- fs/notify/mark.c | 18 +++++++++--------- fs/notify/vfsmount_mark.c | 4 ++-- include/linux/fsnotify_backend.h | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/group.c b/fs/notify/group.c index 354044c47e23..1f7305711fc9 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -95,7 +95,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) init_waitqueue_head(&group->notification_waitq); group->max_events = UINT_MAX; - spin_lock_init(&group->mark_lock); + mutex_init(&group->mark_mutex); INIT_LIST_HEAD(&group->marks_list); group->ops = ops; diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index b13c00ac48eb..4e9071e37d5d 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -63,8 +63,8 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) { struct inode *inode = mark->i.inode; + BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&mark->group->mark_lock); spin_lock(&inode->i_lock); @@ -191,8 +191,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, mark->flags |= FSNOTIFY_MARK_FLAG_INODE; + BUG_ON(!mutex_is_locked(&group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&group->mark_lock); spin_lock(&inode->i_lock); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 32447dc06c07..ab25b810b146 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -136,13 +136,13 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) group = mark->group; spin_unlock(&mark->lock); - spin_lock(&group->mark_lock); + mutex_lock(&group->mark_mutex); spin_lock(&mark->lock); /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { spin_unlock(&mark->lock); - spin_unlock(&group->mark_lock); + mutex_unlock(&group->mark_mutex); goto put_group; } @@ -159,7 +159,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) list_del_init(&mark->g_list); spin_unlock(&mark->lock); - spin_unlock(&group->mark_lock); + mutex_unlock(&group->mark_mutex); spin_lock(&destroy_lock); list_add(&mark->destroy_list, &destroy_list); @@ -232,11 +232,11 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, /* * LOCKING ORDER!!!! - * group->mark_lock + * group->mark_mutex * mark->lock * inode->i_lock */ - spin_lock(&group->mark_lock); + mutex_lock(&group->mark_mutex); spin_lock(&mark->lock); mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE; @@ -263,7 +263,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_set_mark_mask_locked(mark, mark->mask); spin_unlock(&mark->lock); - spin_unlock(&group->mark_lock); + mutex_unlock(&group->mark_mutex); if (inode) __fsnotify_update_child_dentry_flags(inode); @@ -277,7 +277,7 @@ err: atomic_dec(&group->num_marks); spin_unlock(&mark->lock); - spin_unlock(&group->mark_lock); + mutex_unlock(&group->mark_mutex); spin_lock(&destroy_lock); list_add(&mark->destroy_list, &destroy_list); @@ -296,7 +296,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, struct fsnotify_mark *lmark, *mark; LIST_HEAD(free_list); - spin_lock(&group->mark_lock); + mutex_lock(&group->mark_mutex); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { if (mark->flags & flags) { list_add(&mark->free_g_list, &free_list); @@ -304,7 +304,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, fsnotify_get_mark(mark); } } - spin_unlock(&group->mark_lock); + mutex_unlock(&group->mark_mutex); list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) { fsnotify_destroy_mark(mark); diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index b7b4b0e8554f..f26a348827f8 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -88,8 +88,8 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark) { struct vfsmount *mnt = mark->m.mnt; + BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&mark->group->mark_lock); spin_lock(&mnt->mnt_root->d_lock); @@ -151,8 +151,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; + BUG_ON(!mutex_is_locked(&group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&group->mark_lock); spin_lock(&mnt->mnt_root->d_lock); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e76cef75295d..c5848346840d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -141,7 +141,7 @@ struct fsnotify_group { unsigned int priority; /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ - spinlock_t mark_lock; /* protect marks_list */ + struct mutex mark_mutex; /* protect marks_list */ atomic_t num_marks; /* 1 for each mark and 1 for not being * past the point of no return when freeing * a group */ -- cgit v1.2.3 From e2a29943e9a2ee2aa737a77f550f46ba72269db4 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:51 +0200 Subject: fsnotify: pass group to fsnotify_destroy_mark() In fsnotify_destroy_mark() dont get the group from the passed mark anymore, but pass the group itself as an additional parameter to the function. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/dnotify/dnotify.c | 4 ++-- fs/notify/fanotify/fanotify_user.c | 4 ++-- fs/notify/inode_mark.c | 10 +++++++++- fs/notify/inotify/inotify_fsnotify.c | 2 +- fs/notify/inotify/inotify_user.c | 2 +- fs/notify/mark.c | 21 ++++----------------- fs/notify/vfsmount_mark.c | 10 +++++++++- include/linux/fsnotify_backend.h | 5 +++-- kernel/audit_tree.c | 10 +++++----- kernel/audit_watch.c | 4 ++-- 10 files changed, 38 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 3344bdd5506e..08b886f119ce 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -201,7 +201,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) /* nothing else could have found us thanks to the dnotify_mark_mutex */ if (dn_mark->dn == NULL) - fsnotify_destroy_mark(fsn_mark); + fsnotify_destroy_mark(fsn_mark, dnotify_group); mutex_unlock(&dnotify_mark_mutex); @@ -385,7 +385,7 @@ out: spin_unlock(&fsn_mark->lock); if (destroy) - fsnotify_destroy_mark(fsn_mark); + fsnotify_destroy_mark(fsn_mark, dnotify_group); mutex_unlock(&dnotify_mark_mutex); fsnotify_put_mark(fsn_mark); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 599a01952c74..1218d10424d0 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -546,7 +546,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); if (destroy_mark) - fsnotify_destroy_mark(fsn_mark); + fsnotify_destroy_mark(fsn_mark, group); fsnotify_put_mark(fsn_mark); if (removed & real_mount(mnt)->mnt_fsnotify_mask) @@ -570,7 +570,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); if (destroy_mark) - fsnotify_destroy_mark(fsn_mark); + fsnotify_destroy_mark(fsn_mark, group); /* matches the fsnotify_find_inode_mark() */ fsnotify_put_mark(fsn_mark); if (removed & inode->i_fsnotify_mask) diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 4e9071e37d5d..21230209c957 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -99,8 +99,16 @@ void fsnotify_clear_marks_by_inode(struct inode *inode) spin_unlock(&inode->i_lock); list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) { - fsnotify_destroy_mark(mark); + struct fsnotify_group *group; + + spin_lock(&mark->lock); + fsnotify_get_group(mark->group); + group = mark->group; + spin_unlock(&mark->lock); + + fsnotify_destroy_mark(mark, group); fsnotify_put_mark(mark); + fsnotify_put_group(group); } } diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 74977fbf5aae..871569c7d609 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -132,7 +132,7 @@ static int inotify_handle_event(struct fsnotify_group *group, } if (inode_mark->mask & IN_ONESHOT) - fsnotify_destroy_mark(inode_mark); + fsnotify_destroy_mark(inode_mark, group); return ret; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 246250f1db7a..00ff82ff7c9f 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -816,7 +816,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) ret = 0; - fsnotify_destroy_mark(&i_mark->fsn_mark); + fsnotify_destroy_mark(&i_mark->fsn_mark, group); /* match ref taken by inotify_idr_find */ fsnotify_put_mark(&i_mark->fsn_mark); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index ab25b810b146..b77c833c8d0a 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -121,21 +121,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) * The caller had better be holding a reference to this mark so we don't actually * do the final put under the mark->lock */ -void fsnotify_destroy_mark(struct fsnotify_mark *mark) +void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group) { - struct fsnotify_group *group; struct inode *inode = NULL; - spin_lock(&mark->lock); - /* dont get the group from a mark that is not alive yet */ - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { - spin_unlock(&mark->lock); - return; - } - fsnotify_get_group(mark->group); - group = mark->group; - spin_unlock(&mark->lock); - mutex_lock(&group->mark_mutex); spin_lock(&mark->lock); @@ -143,7 +133,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { spin_unlock(&mark->lock); mutex_unlock(&group->mark_mutex); - goto put_group; + return; } mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; @@ -194,9 +184,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) */ atomic_dec(&group->num_marks); - -put_group: - fsnotify_put_group(group); } void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) @@ -307,7 +294,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, mutex_unlock(&group->mark_mutex); list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) { - fsnotify_destroy_mark(mark); + fsnotify_destroy_mark(mark, group); fsnotify_put_mark(mark); } } diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index f26a348827f8..4df58b8ea64a 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -46,8 +46,16 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) spin_unlock(&mnt->mnt_root->d_lock); list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) { - fsnotify_destroy_mark(mark); + struct fsnotify_group *group; + + spin_lock(&mark->lock); + fsnotify_get_group(mark->group); + group = mark->group; + spin_unlock(&mark->lock); + + fsnotify_destroy_mark(mark, group); fsnotify_put_mark(mark); + fsnotify_put_group(group); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c5848346840d..140b4b8a100b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -408,8 +408,9 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); -/* given a mark, flag it to be freed when all references are dropped */ -extern void fsnotify_destroy_mark(struct fsnotify_mark *mark); +/* given a group and a mark, flag mark to be freed when all references are dropped */ +extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group); /* run all the marks in a group, and clear all of the vfsmount marks */ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index ed206fd88cca..e81175ef25f8 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -249,7 +249,7 @@ static void untag_chunk(struct node *p) list_del_rcu(&chunk->hash); spin_unlock(&hash_lock); spin_unlock(&entry->lock); - fsnotify_destroy_mark(entry); + fsnotify_destroy_mark(entry, audit_tree_group); goto out; } @@ -291,7 +291,7 @@ static void untag_chunk(struct node *p) owner->root = new; spin_unlock(&hash_lock); spin_unlock(&entry->lock); - fsnotify_destroy_mark(entry); + fsnotify_destroy_mark(entry, audit_tree_group); fsnotify_put_mark(&new->mark); /* drop initial reference */ goto out; @@ -331,7 +331,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); chunk->dead = 1; spin_unlock(&entry->lock); - fsnotify_destroy_mark(entry); + fsnotify_destroy_mark(entry, audit_tree_group); fsnotify_put_mark(entry); return 0; } @@ -412,7 +412,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); - fsnotify_destroy_mark(chunk_entry); + fsnotify_destroy_mark(chunk_entry, audit_tree_group); fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); @@ -443,7 +443,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); - fsnotify_destroy_mark(old_entry); + fsnotify_destroy_mark(old_entry, audit_tree_group); fsnotify_put_mark(chunk_entry); /* drop initial reference */ fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ return 0; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3823281401b5..a66affc1c12c 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -349,7 +349,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent) } mutex_unlock(&audit_filter_mutex); - fsnotify_destroy_mark(&parent->mark); + fsnotify_destroy_mark(&parent->mark, audit_watch_group); } /* Get path information necessary for adding watches. */ @@ -456,7 +456,7 @@ void audit_remove_watch_rule(struct audit_krule *krule) if (list_empty(&parent->watches)) { audit_get_parent(parent); - fsnotify_destroy_mark(&parent->mark); + fsnotify_destroy_mark(&parent->mark, audit_watch_group); audit_put_parent(parent); } } -- cgit v1.2.3 From d5a335b845792d2a69ed1e244c0b233117b7db3c Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:52 +0200 Subject: fsnotify: introduce locked versions of fsnotify_add_mark() and fsnotify_remove_mark() This patch introduces fsnotify_add_mark_locked() and fsnotify_remove_mark_locked() which are essentially the same as fsnotify_add_mark() and fsnotify_remove_mark() but assume that the caller has already taken the groups mark mutex. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/mark.c | 42 ++++++++++++++++++++++++++++------------ include/linux/fsnotify_backend.h | 4 ++++ 2 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/mark.c b/fs/notify/mark.c index b77c833c8d0a..f9dda0304a10 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -121,18 +121,18 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) * The caller had better be holding a reference to this mark so we don't actually * do the final put under the mark->lock */ -void fsnotify_destroy_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group) +void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group) { struct inode *inode = NULL; - mutex_lock(&group->mark_mutex); + BUG_ON(!mutex_is_locked(&group->mark_mutex)); + spin_lock(&mark->lock); /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { spin_unlock(&mark->lock); - mutex_unlock(&group->mark_mutex); return; } @@ -149,6 +149,8 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark, list_del_init(&mark->g_list); spin_unlock(&mark->lock); + + /* release lock temporarily */ mutex_unlock(&group->mark_mutex); spin_lock(&destroy_lock); @@ -184,6 +186,16 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark, */ atomic_dec(&group->num_marks); + + mutex_lock(&group->mark_mutex); +} + +void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group) +{ + mutex_lock(&group->mark_mutex); + fsnotify_destroy_mark_locked(mark, group); + mutex_unlock(&group->mark_mutex); } void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) @@ -208,14 +220,15 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. */ -int fsnotify_add_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group, struct inode *inode, + struct vfsmount *mnt, int allow_dups) { int ret = 0; BUG_ON(inode && mnt); BUG_ON(!inode && !mnt); + BUG_ON(!mutex_is_locked(&group->mark_mutex)); /* * LOCKING ORDER!!!! @@ -223,8 +236,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, * mark->lock * inode->i_lock */ - mutex_lock(&group->mark_mutex); - spin_lock(&mark->lock); mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE; @@ -250,8 +261,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_set_mark_mask_locked(mark, mark->mask); spin_unlock(&mark->lock); - mutex_unlock(&group->mark_mutex); - if (inode) __fsnotify_update_child_dentry_flags(inode); @@ -264,7 +273,6 @@ err: atomic_dec(&group->num_marks); spin_unlock(&mark->lock); - mutex_unlock(&group->mark_mutex); spin_lock(&destroy_lock); list_add(&mark->destroy_list, &destroy_list); @@ -274,6 +282,16 @@ err: return ret; } +int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, + struct inode *inode, struct vfsmount *mnt, int allow_dups) +{ + int ret; + mutex_lock(&group->mark_mutex); + ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); + mutex_unlock(&group->mark_mutex); + return ret; +} + /* * clear any marks in a group in which mark->flags & flags is true */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 140b4b8a100b..26c06afa264e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -408,9 +408,13 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); +extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group, + struct inode *inode, struct vfsmount *mnt, int allow_dups); /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); +extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group); /* run all the marks in a group, and clear all of the vfsmount marks */ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ -- cgit v1.2.3 From 64c20d2a20fce295c260ea6cb3b468edfa2fb07b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:53 +0200 Subject: fsnotify: dont put marks on temporary list when clearing marks by group In clear_marks_by_group_flags() the mark list of a group is iterated and the marks are put on a temporary list. Since we introduced fsnotify_destroy_mark_locked() we dont need the temp list any more and are able to remove the marks while the mark list is iterated and the mark list mutex is held. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/mark.c | 10 ++-------- include/linux/fsnotify_backend.h | 1 - 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/mark.c b/fs/notify/mark.c index f9dda0304a10..0e93d90bb753 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -299,22 +299,16 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags) { struct fsnotify_mark *lmark, *mark; - LIST_HEAD(free_list); mutex_lock(&group->mark_mutex); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { if (mark->flags & flags) { - list_add(&mark->free_g_list, &free_list); - list_del_init(&mark->g_list); fsnotify_get_mark(mark); + fsnotify_destroy_mark_locked(mark, group); + fsnotify_put_mark(mark); } } mutex_unlock(&group->mark_mutex); - - list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) { - fsnotify_destroy_mark(mark, group); - fsnotify_put_mark(mark); - } } /* diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 26c06afa264e..5a8899350456 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -287,7 +287,6 @@ struct fsnotify_mark { struct fsnotify_inode_mark i; struct fsnotify_vfsmount_mark m; }; - struct list_head free_g_list; /* tmp list used when freeing this mark */ __u32 ignored_mask; /* events types to ignore */ #define FSNOTIFY_MARK_FLAG_INODE 0x01 #define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 -- cgit v1.2.3 From 6960b0d909cde5bdff49e4e5c1250edd10be7ebd Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 12 Aug 2011 01:13:31 +0200 Subject: fsnotify: change locking order On Mon, Aug 01, 2011 at 04:38:22PM -0400, Eric Paris wrote: > > I finally built and tested a v3.0 kernel with these patches (I know I'm > SOOOOOO far behind). Not what I hoped for: > > > [ 150.937798] VFS: Busy inodes after unmount of tmpfs. Self-destruct in 5 seconds. Have a nice day... > > [ 150.945290] BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 > > [ 150.946012] IP: [] shmem_free_inode+0x18/0x50 > > [ 150.946012] PGD 2bf9e067 PUD 2bf9f067 PMD 0 > > [ 150.946012] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > > [ 150.946012] CPU 0 > > [ 150.946012] Modules linked in: nfs lockd fscache auth_rpcgss nfs_acl sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ext4 jbd2 crc16 joydev ata_piix i2c_piix4 pcspkr uinput ipv6 autofs4 usbhid [last unloaded: scsi_wait_scan] > > [ 150.946012] > > [ 150.946012] Pid: 2764, comm: syscall_thrash Not tainted 3.0.0+ #1 Red Hat KVM > > [ 150.946012] RIP: 0010:[] [] shmem_free_inode+0x18/0x50 > > [ 150.946012] RSP: 0018:ffff88002c2e5df8 EFLAGS: 00010282 > > [ 150.946012] RAX: 000000004e370d9f RBX: 0000000000000000 RCX: ffff88003a029438 > > [ 150.946012] RDX: 0000000033630a5f RSI: 0000000000000000 RDI: ffff88003491c240 > > [ 150.946012] RBP: ffff88002c2e5e08 R08: 0000000000000000 R09: 0000000000000000 > > [ 150.946012] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a029428 > > [ 150.946012] R13: ffff88003a029428 R14: ffff88003a029428 R15: ffff88003499a610 > > [ 150.946012] FS: 00007f5a05420700(0000) GS:ffff88003f600000(0000) knlGS:0000000000000000 > > [ 150.946012] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > [ 150.946012] CR2: 0000000000000070 CR3: 000000002a662000 CR4: 00000000000006f0 > > [ 150.946012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > [ 150.946012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > > [ 150.946012] Process syscall_thrash (pid: 2764, threadinfo ffff88002c2e4000, task ffff88002bfbc760) > > [ 150.946012] Stack: > > [ 150.946012] ffff88003a029438 ffff88003a029428 ffff88002c2e5e38 ffffffff81102f76 > > [ 150.946012] ffff88003a029438 ffff88003a029598 ffffffff8160f9c0 ffff88002c221250 > > [ 150.946012] ffff88002c2e5e68 ffffffff8115e9be ffff88002c2e5e68 ffff88003a029438 > > [ 150.946012] Call Trace: > > [ 150.946012] [] shmem_evict_inode+0x76/0x130 > > [ 150.946012] [] evict+0x7e/0x170 > > [ 150.946012] [] iput_final+0xd0/0x190 > > [ 150.946012] [] iput+0x33/0x40 > > [ 150.946012] [] fsnotify_destroy_mark_locked+0x145/0x160 > > [ 150.946012] [] fsnotify_destroy_mark+0x36/0x50 > > [ 150.946012] [] sys_inotify_rm_watch+0x77/0xd0 > > [ 150.946012] [] system_call_fastpath+0x16/0x1b > > [ 150.946012] Code: 67 4a 00 b8 e4 ff ff ff eb aa 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 48 8b 9f 40 05 00 00 > > [ 150.946012] 83 7b 70 00 74 1c 4c 8d a3 80 00 00 00 4c 89 e7 e8 d2 5d 4a > > [ 150.946012] RIP [] shmem_free_inode+0x18/0x50 > > [ 150.946012] RSP > > [ 150.946012] CR2: 0000000000000070 > > Looks at aweful lot like the problem from: > http://www.spinics.net/lists/linux-fsdevel/msg46101.html > I tried to reproduce this bug with your test program, but without success. However, if I understand correctly, this occurs since we dont hold any locks when we call iput() in mark_destroy(), right? With the patches you tested, iput() is also not called within any lock, since the groups mark_mutex is released temporarily before iput() is called. This is, since the original codes behaviour is similar. However since we now have a mutex as the biggest lock, we can do what you suggested (http://www.spinics.net/lists/linux-fsdevel/msg46107.html) and call iput() with the mutex held to avoid the race. The patch below implements this. It uses nested locking to avoid deadlock in case we do the final iput() on an inode which still holds marks and thus would take the mutex again when calling fsnotify_inode_delete() in destroy_inode(). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/mark.c | 20 ++++++++++---------- include/linux/fsnotify_backend.h | 7 ++++--- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 0e93d90bb753..fc6b49bf7360 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -150,6 +150,8 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, spin_unlock(&mark->lock); + if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) + iput(inode); /* release lock temporarily */ mutex_unlock(&group->mark_mutex); @@ -157,6 +159,11 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, list_add(&mark->destroy_list, &destroy_list); spin_unlock(&destroy_lock); wake_up(&destroy_waitq); + /* + * We don't necessarily have a ref on mark from caller so the above destroy + * may have actually freed it, unless this group provides a 'freeing_mark' + * function which must be holding a reference. + */ /* * Some groups like to know that marks are being freed. This is a @@ -178,22 +185,15 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, * is just a lazy update (and could be a perf win...) */ - if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) - iput(inode); - /* - * We don't necessarily have a ref on mark from caller so the above iput - * may have already destroyed it. Don't touch from now on. - */ - atomic_dec(&group->num_marks); - mutex_lock(&group->mark_mutex); + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); } void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group) { - mutex_lock(&group->mark_mutex); + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); fsnotify_destroy_mark_locked(mark, group); mutex_unlock(&group->mark_mutex); } @@ -300,7 +300,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, { struct fsnotify_mark *lmark, *mark; - mutex_lock(&group->mark_mutex); + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { if (mark->flags & flags) { fsnotify_get_mark(mark); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5a8899350456..1af2f6a722c0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -88,9 +88,10 @@ struct fsnotify_event_private_data; * if the group is interested in this event. * handle_event - main call for a group to handle an fs event * free_group_priv - called when a group refcnt hits 0 to clean up the private union - * freeing-mark - this means that a mark has been flagged to die when everything - * finishes using it. The function is supplied with what must be a - * valid group and inode to use to clean up. + * freeing_mark - called when a mark is being destroyed for some reason. The group + * MUST be holding a reference on each mark and that reference must be + * dropped in this function. inotify uses this function to send + * userspace messages that marks have been removed. */ struct fsnotify_ops { bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, -- cgit v1.2.3 From 0a6b6bd5919a65030b557ec8fe81f6fb3e93744a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 14 Oct 2011 17:43:39 -0400 Subject: fsnotify: make fasync generic for both inotify and fanotify inotify is supposed to support async signal notification when information is available on the inotify fd. This patch moves that support to generic fsnotify functions so it can be used by all notification mechanisms. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 4 ++++ fs/notify/group.c | 7 +++++++ fs/notify/inotify/inotify_user.c | 13 ++++--------- fs/notify/notification.c | 1 + include/linux/fsnotify_backend.h | 5 ++++- 5 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1218d10424d0..f0e7a57bc899 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -414,6 +414,10 @@ static int fanotify_release(struct inode *ignored, struct file *file) wake_up(&group->fanotify_data.access_waitq); #endif + + if (file->f_flags & FASYNC) + fsnotify_fasync(-1, file, 0); + /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_destroy_group(group); diff --git a/fs/notify/group.c b/fs/notify/group.c index 1f7305711fc9..bd2625bd88b4 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -102,3 +102,10 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) return group; } + +int fsnotify_fasync(int fd, struct file *file, int on) +{ + struct fsnotify_group *group = file->private_data; + + return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO; +} diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 00ff82ff7c9f..68f7bec1e664 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -280,19 +280,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf, return ret; } -static int inotify_fasync(int fd, struct file *file, int on) -{ - struct fsnotify_group *group = file->private_data; - - return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO; -} - static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; pr_debug("%s: group=%p\n", __func__, group); + if (file->f_flags & FASYNC) + fsnotify_fasync(-1, file, 0); + /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_destroy_group(group); @@ -335,7 +331,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, static const struct file_operations inotify_fops = { .poll = inotify_poll, .read = inotify_read, - .fasync = inotify_fasync, + .fasync = fsnotify_fasync, .release = inotify_release, .unlocked_ioctl = inotify_ioctl, .compat_ioctl = inotify_ioctl, @@ -706,7 +702,6 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); group->inotify_data.last_wd = 0; - group->inotify_data.fa = NULL; group->inotify_data.user = get_current_user(); if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > diff --git a/fs/notify/notification.c b/fs/notify/notification.c index c887b1378f7e..b3963d8c9988 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -225,6 +225,7 @@ alloc_holder: mutex_unlock(&group->notification_mutex); wake_up(&group->notification_waitq); + kill_fasync(&group->fsn_fa, SIGIO, POLL_IN); return return_event; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1af2f6a722c0..d5b0910d4961 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -148,6 +148,8 @@ struct fsnotify_group { * a group */ struct list_head marks_list; /* all inode marks for this group */ + struct fasync_struct *fsn_fa; /* async notification */ + /* groups can define private fields here or use the void *private */ union { void *private; @@ -156,7 +158,6 @@ struct fsnotify_group { spinlock_t idr_lock; struct idr idr; u32 last_wd; - struct fasync_struct *fa; /* async notification */ struct user_struct *user; } inotify_data; #endif @@ -368,6 +369,8 @@ extern void fsnotify_get_group(struct fsnotify_group *group); extern void fsnotify_put_group(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); +/* fasync handler function */ +extern int fsnotify_fasync(int fd, struct file *file, int on); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -- cgit v1.2.3 From 7056741fd9fc14a65608549a4657cf5178f05f63 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Thu, 8 Nov 2012 13:47:44 -0800 Subject: lib/raid6: Add AVX2 optimized recovery functions Optimize RAID6 recovery functions to take advantage of the 256-bit YMM integer instructions introduced in AVX2. The patch was tested and benchmarked before submission. However hardware is not yet released so benchmark numbers cannot be reported. Acked-by: "H. Peter Anvin" Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- arch/x86/Makefile | 5 +- include/linux/raid/pq.h | 1 + lib/raid6/Makefile | 2 +- lib/raid6/algos.c | 3 + lib/raid6/recov_avx2.c | 327 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/raid6/test/Makefile | 2 +- lib/raid6/x86.h | 14 ++- 7 files changed, 345 insertions(+), 9 deletions(-) create mode 100644 lib/raid6/recov_avx2.c (limited to 'include/linux') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 58790bd85c1d..95477aae9ff7 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -123,9 +123,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI # does binutils support specific instructions? asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) +avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) -KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) -KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) +KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) +KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) LDFLAGS := -m elf_$(UTS_MACHINE) diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 640c69ceec96..3156347452b9 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -109,6 +109,7 @@ struct raid6_recov_calls { extern const struct raid6_recov_calls raid6_recov_intx1; extern const struct raid6_recov_calls raid6_recov_ssse3; +extern const struct raid6_recov_calls raid6_recov_avx2; /* Algorithm list */ extern const struct raid6_calls * const raid6_algos[]; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index de06dfe165b8..8c2e22bef661 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o -raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \ +raid6_pq-y += algos.o recov.o recov_ssse3.o recov_avx2.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ altivec8.o mmx.o sse1.o sse2.o hostprogs-y += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 589f5f50ad2e..8b7f55cadb45 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -72,6 +72,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov); const struct raid6_recov_calls *const raid6_recov_algos[] = { #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) +#ifdef CONFIG_AS_AVX2 + &raid6_recov_avx2, +#endif &raid6_recov_ssse3, #endif &raid6_recov_intx1, diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c new file mode 100644 index 000000000000..43a9bab91879 --- /dev/null +++ b/lib/raid6/recov_avx2.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2012 Intel Corporation + * Author: Jim Kukunas + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#if CONFIG_AS_AVX2 + +#include +#include "x86.h" + +static int raid6_has_avx2(void) +{ + return boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX); +} + +static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + const u8 x0f = 0x0f; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ + raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* ymm0 = x0f[16] */ + asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); + + while (bytes) { +#ifdef CONFIG_X86_64 + asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0])); + asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32])); + asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0])); + asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32])); + asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0])); + asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32])); + asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0])); + asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32])); + + /* + * 1 = dq[0] ^ q[0] + * 9 = dq[32] ^ q[32] + * 0 = dp[0] ^ p[0] + * 8 = dp[32] ^ p[32] + */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); + + asm volatile("vpsraw $4, %ymm1, %ymm3"); + asm volatile("vpsraw $4, %ymm9, %ymm12"); + asm volatile("vpand %ymm7, %ymm1, %ymm1"); + asm volatile("vpand %ymm7, %ymm9, %ymm9"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpand %ymm7, %ymm12, %ymm12"); + asm volatile("vpshufb %ymm9, %ymm4, %ymm14"); + asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm12, %ymm5, %ymm15"); + asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); + asm volatile("vpxor %ymm14, %ymm15, %ymm15"); + asm volatile("vpxor %ymm4, %ymm5, %ymm5"); + + /* + * 5 = qx[0] + * 15 = qx[32] + */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); + asm volatile("vpsraw $4, %ymm0, %ymm2"); + asm volatile("vpsraw $4, %ymm8, %ymm6"); + asm volatile("vpand %ymm7, %ymm0, %ymm3"); + asm volatile("vpand %ymm7, %ymm8, %ymm14"); + asm volatile("vpand %ymm7, %ymm2, %ymm2"); + asm volatile("vpand %ymm7, %ymm6, %ymm6"); + asm volatile("vpshufb %ymm14, %ymm4, %ymm12"); + asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm6, %ymm1, %ymm13"); + asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); + asm volatile("vpxor %ymm4, %ymm1, %ymm1"); + asm volatile("vpxor %ymm12, %ymm13, %ymm13"); + + /* + * 1 = pbmul[px[0]] + * 13 = pbmul[px[32]] + */ + asm volatile("vpxor %ymm5, %ymm1, %ymm1"); + asm volatile("vpxor %ymm15, %ymm13, %ymm13"); + + /* + * 1 = db = DQ + * 13 = db[32] = DQ[32] + */ + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32])); + asm volatile("vpxor %ymm1, %ymm0, %ymm0"); + asm volatile("vpxor %ymm13, %ymm8, %ymm8"); + + asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); + asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; +#else + asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q)); + asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p)); + asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq)); + asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp)); + + /* 1 = dq ^ q; 0 = dp ^ p */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); + + /* + * 1 = dq ^ q + * 3 = dq ^ p >> 4 + */ + asm volatile("vpsraw $4, %ymm1, %ymm3"); + asm volatile("vpand %ymm7, %ymm1, %ymm1"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); + asm volatile("vpxor %ymm4, %ymm5, %ymm5"); + + /* 5 = qx */ + + asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); + + asm volatile("vpsraw $4, %ymm0, %ymm2"); + asm volatile("vpand %ymm7, %ymm0, %ymm3"); + asm volatile("vpand %ymm7, %ymm2, %ymm2"); + asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); + asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); + asm volatile("vpxor %ymm4, %ymm1, %ymm1"); + + /* 1 = pbmul[px] */ + asm volatile("vpxor %ymm5, %ymm1, %ymm1"); + /* 1 = db = DQ */ + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + + asm volatile("vpxor %ymm1, %ymm0, %ymm0"); + asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); + + bytes -= 32; + p += 32; + q += 32; + dp += 32; + dq += 32; +#endif + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + const u8 x0f = 0x0f; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); + + while (bytes) { +#ifdef CONFIG_X86_64 + asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); + asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32])); + asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); + asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32])); + + /* + * 3 = q[0] ^ dq[0] + * 8 = q[32] ^ dq[32] + */ + asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); + asm volatile("vmovapd %ymm0, %ymm13"); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); + asm volatile("vmovapd %ymm1, %ymm14"); + + asm volatile("vpsraw $4, %ymm3, %ymm6"); + asm volatile("vpsraw $4, %ymm8, %ymm12"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpand %ymm7, %ymm8, %ymm8"); + asm volatile("vpand %ymm7, %ymm6, %ymm6"); + asm volatile("vpand %ymm7, %ymm12, %ymm12"); + asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); + asm volatile("vpshufb %ymm8, %ymm13, %ymm13"); + asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); + asm volatile("vpshufb %ymm12, %ymm14, %ymm14"); + asm volatile("vpxor %ymm0, %ymm1, %ymm1"); + asm volatile("vpxor %ymm13, %ymm14, %ymm14"); + + /* + * 1 = qmul[q[0] ^ dq[0]] + * 14 = qmul[q[32] ^ dq[32]] + */ + asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); + asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32])); + asm volatile("vpxor %ymm1, %ymm2, %ymm2"); + asm volatile("vpxor %ymm14, %ymm12, %ymm12"); + + /* + * 2 = p[0] ^ qmul[q[0] ^ dq[0]] + * 12 = p[32] ^ qmul[q[32] ^ dq[32]] + */ + + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32])); + asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); + asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; +#else + asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); + asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); + + /* 3 = q ^ dq */ + + asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); + asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); + + asm volatile("vpsraw $4, %ymm3, %ymm6"); + asm volatile("vpand %ymm7, %ymm3, %ymm3"); + asm volatile("vpand %ymm7, %ymm6, %ymm6"); + asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); + asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); + asm volatile("vpxor %ymm0, %ymm1, %ymm1"); + + /* 1 = qmul[q ^ dq] */ + + asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); + asm volatile("vpxor %ymm1, %ymm2, %ymm2"); + + /* 2 = p ^ qmul[q ^ dq] */ + + asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); + asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); + + bytes -= 32; + p += 32; + q += 32; + dq += 32; +#endif + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_avx2 = { + .data2 = raid6_2data_recov_avx2, + .datap = raid6_datap_recov_avx2, + .valid = raid6_has_avx2, +#ifdef CONFIG_X86_64 + .name = "avx2x2", +#else + .name = "avx2x1", +#endif + .priority = 2, +}; + +#else +#warning "your version of binutils lacks AVX2 support" +#endif + +#endif diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index c76151d94764..d919c98ce266 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -23,7 +23,7 @@ RANLIB = ranlib all: raid6.a raid6test raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ - altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \ + altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o recov_avx2.o algos.o \ tables.o rm -f $@ $(AR) cq $@ $^ diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index d55d63232c55..b7595484a815 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void) #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ +#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ /* Should work well enough on modern CPUs for testing */ static inline int boot_cpu_has(int flag) { - u32 eax = (flag & 0x20) ? 0x80000001 : 1; - u32 ecx, edx; + u32 eax, ebx, ecx, edx; + + eax = (flag & 0x100) ? 7 : + (flag & 0x20) ? 0x80000001 : 1; + ecx = 0; asm volatile("cpuid" - : "+a" (eax), "=d" (edx), "=c" (ecx) - : : "ebx"); + : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); - return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; + return ((flag & 0x100 ? ebx : + (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1; } #endif /* ndef __KERNEL__ */ -- cgit v1.2.3 From 2c935842bdb46f5f557426feb4d2bdfdad1aa5f9 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Fri, 30 Nov 2012 13:10:39 -0800 Subject: lib/raid6: Add AVX2 optimized gen_syndrome functions Add AVX2 optimized gen_syndrom functions, which is simply based on sse2.c written by hpa. Signed-off-by: Yuanhan Liu Reviewed-by: H. Peter Anvin Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- include/linux/raid/pq.h | 3 + lib/raid6/Makefile | 2 +- lib/raid6/algos.c | 9 ++ lib/raid6/avx2.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/raid6/test/Makefile | 12 ++- 5 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 lib/raid6/avx2.c (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 3156347452b9..8dfaa2ce2e95 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -98,6 +98,9 @@ extern const struct raid6_calls raid6_altivec1; extern const struct raid6_calls raid6_altivec2; extern const struct raid6_calls raid6_altivec4; extern const struct raid6_calls raid6_altivec8; +extern const struct raid6_calls raid6_avx2x1; +extern const struct raid6_calls raid6_avx2x2; +extern const struct raid6_calls raid6_avx2x4; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 8c2e22bef661..3430711b9bdf 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o raid6_pq-y += algos.o recov.o recov_ssse3.o recov_avx2.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ - altivec8.o mmx.o sse1.o sse2.o + altivec8.o mmx.o sse1.o sse2.o avx2.o hostprogs-y += mktables quiet_cmd_unroll = UNROLL $@ diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 8b7f55cadb45..6d7316fe9f30 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_sse1x2, &raid6_sse2x1, &raid6_sse2x2, +#ifdef CONFIG_AS_AVX2 + &raid6_avx2x1, + &raid6_avx2x2, +#endif #endif #if defined(__x86_64__) && !defined(__arch_um__) &raid6_sse2x1, &raid6_sse2x2, &raid6_sse2x4, +#ifdef CONFIG_AS_AVX2 + &raid6_avx2x1, + &raid6_avx2x2, + &raid6_avx2x4, +#endif #endif #ifdef CONFIG_ALTIVEC &raid6_altivec1, diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c new file mode 100644 index 000000000000..bc3b1dd436eb --- /dev/null +++ b/lib/raid6/avx2.c @@ -0,0 +1,251 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 2012 Intel Corporation + * Author: Yuanhan Liu + * + * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * AVX2 implementation of RAID-6 syndrome functions + * + */ + +#ifdef CONFIG_AS_AVX2 + +#include +#include "x86.h" + +static const struct raid6_avx2_constants { + u64 x1d[4]; +} raid6_avx2_constants __aligned(32) = { + { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, + 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, +}; + +static int raid6_have_avx2(void) +{ + return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX); +} + +/* + * Plain AVX2 implementation + */ +static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); + asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */ + + for (d = 0; d < bytes; d += 32) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */ + asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d])); + for (z = z0-2; z >= 0; z--) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm6,%ymm2,%ymm2"); + asm volatile("vpxor %ymm6,%ymm4,%ymm4"); + asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d])); + } + asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm6,%ymm2,%ymm2"); + asm volatile("vpxor %ymm6,%ymm4,%ymm4"); + + asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); + asm volatile("vpxor %ymm2,%ymm2,%ymm2"); + asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); + asm volatile("vpxor %ymm4,%ymm4,%ymm4"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_avx2x1 = { + raid6_avx21_gen_syndrome, + raid6_have_avx2, + "avx2x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 AVX2 implementation + */ +static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); + asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 32 bytes */ + for (d = 0; d < bytes; d += 64) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32])); + asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ + asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */ + asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */ + asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */ + for (z = z0-1; z >= 0; z--) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); + asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); + asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpand %ymm0,%ymm7,%ymm7"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); + asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); + asm volatile("vpxor %ymm5,%ymm2,%ymm2"); + asm volatile("vpxor %ymm7,%ymm3,%ymm3"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + } + asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); + asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); + asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); + asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_avx2x2 = { + raid6_avx22_gen_syndrome, + raid6_have_avx2, + "avx2x2", + 1 /* Has cache hints */ +}; + +#ifdef CONFIG_X86_64 + +/* + * Unrolled-by-4 AVX2 implementation + */ +static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); + asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ + asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */ + asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */ + asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */ + asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */ + asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */ + asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */ + asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */ + asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */ + + for (d = 0; d < bytes; d += 128) { + for (z = z0; z >= 0; z--) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64])); + asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96])); + asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); + asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); + asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13"); + asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15"); + asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); + asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); + asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); + asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); + asm volatile("vpand %ymm0,%ymm5,%ymm5"); + asm volatile("vpand %ymm0,%ymm7,%ymm7"); + asm volatile("vpand %ymm0,%ymm13,%ymm13"); + asm volatile("vpand %ymm0,%ymm15,%ymm15"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + asm volatile("vpxor %ymm13,%ymm12,%ymm12"); + asm volatile("vpxor %ymm15,%ymm14,%ymm14"); + asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); + asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); + asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64])); + asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96])); + asm volatile("vpxor %ymm5,%ymm2,%ymm2"); + asm volatile("vpxor %ymm7,%ymm3,%ymm3"); + asm volatile("vpxor %ymm13,%ymm10,%ymm10"); + asm volatile("vpxor %ymm15,%ymm11,%ymm11"); + asm volatile("vpxor %ymm5,%ymm4,%ymm4"); + asm volatile("vpxor %ymm7,%ymm6,%ymm6"); + asm volatile("vpxor %ymm13,%ymm12,%ymm12"); + asm volatile("vpxor %ymm15,%ymm14,%ymm14"); + } + asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); + asm volatile("vpxor %ymm2,%ymm2,%ymm2"); + asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); + asm volatile("vpxor %ymm3,%ymm3,%ymm3"); + asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); + asm volatile("vpxor %ymm10,%ymm10,%ymm10"); + asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); + asm volatile("vpxor %ymm11,%ymm11,%ymm11"); + asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); + asm volatile("vpxor %ymm4,%ymm4,%ymm4"); + asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); + asm volatile("vpxor %ymm6,%ymm6,%ymm6"); + asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); + asm volatile("vpxor %ymm12,%ymm12,%ymm12"); + asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); + asm volatile("vpxor %ymm14,%ymm14,%ymm14"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_avx2x4 = { + raid6_avx24_gen_syndrome, + raid6_have_avx2, + "avx2x4", + 1 /* Has cache hints */ +}; +#endif + +#endif /* CONFIG_AS_AVX2 */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index d919c98ce266..754cbac0f9f8 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -11,6 +11,16 @@ AWK = awk -f AR = ar RANLIB = ranlib +ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/) +ifeq ($(ARCH),i386) + CFLAGS += -DCONFIG_X86_32 +endif +ifeq ($(ARCH),x86_64) + CFLAGS += -DCONFIG_X86_64 +endif +CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1"| gcc -c -x assembler - &&\ + rm ./-.o && echo -DCONFIG_AS_AVX2=1) + .c.o: $(CC) $(CFLAGS) -c -o $@ $< @@ -22,7 +32,7 @@ RANLIB = ranlib all: raid6.a raid6test -raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ +raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o avx2.o \ altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o recov_avx2.o algos.o \ tables.o rm -f $@ -- cgit v1.2.3 From 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 28 Nov 2012 12:28:24 -0800 Subject: libceph: remove 'osdtimeout' option This would reset a connection with any OSD that had an outstanding request that was taking more than N seconds. The idea was that if the OSD was buggy, the client could compensate by resending the request. In reality, this only served to hide server bugs, and we haven't actually seen such a bug in quite a while. Moreover, the userspace client code never did this. More importantly, often the request is taking a long time because the OSD is trying to recover, or overloaded, and killing the connection and retrying would only make the situation worse by giving the OSD more work to do. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/super.c | 2 -- include/linux/ceph/libceph.h | 2 -- net/ceph/ceph_common.c | 3 +-- net/ceph/osd_client.c | 47 ++++---------------------------------------- 4 files changed, 5 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2f586b0e5e0f..fcda1c73a1e5 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); - if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) - seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, ",osdkeepalivetimeout=%d", opt->osd_keepalive_timeout); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 42624789b06f..317aff8feb0a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,7 +43,6 @@ struct ceph_options { struct ceph_entity_addr my_addr; int mount_timeout; int osd_idle_ttl; - int osd_timeout; int osd_keepalive_timeout; /* @@ -63,7 +62,6 @@ struct ceph_options { * defaults */ #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ #define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_IDLE_TTL_DEFAULT 60 diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a8020293f342..ee71ea26777a 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name, /* start with defaults */ opt->flags = CEPH_OPT_DEFAULT; - opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ @@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name, /* misc */ case Opt_osdtimeout: - opt->osd_timeout = intval; + pr_warning("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: opt->osd_keepalive_timeout = intval; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ccbdfbba9e53..7ebfe13267e6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, } } -static void kick_osd_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd) -{ - mutex_lock(&osdc->request_mutex); - __kick_osd_requests(osdc, kickosd); - mutex_unlock(&osdc->request_mutex); -} - /* * If the osd connection drops, we need to resubmit all requests. */ @@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con) dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; down_read(&osdc->map_sem); - kick_osd_requests(osdc, osd); + mutex_lock(&osdc->request_mutex); + __kick_osd_requests(osdc, osd); + mutex_unlock(&osdc->request_mutex); send_queued(osdc); up_read(&osdc->map_sem); } @@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); - struct ceph_osd_request *req, *last_req = NULL; + struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long timeout = osdc->client->options->osd_timeout * HZ; unsigned long keepalive = osdc->client->options->osd_keepalive_timeout * HZ; - unsigned long last_stamp = 0; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1105,37 +1097,6 @@ static void handle_timeout(struct work_struct *work) mutex_lock(&osdc->request_mutex); - /* - * reset osds that appear to be _really_ unresponsive. this - * is a failsafe measure.. we really shouldn't be getting to - * this point if the system is working properly. the monitors - * should mark the osd as failed and we should find out about - * it from an updated osd map. - */ - while (timeout && !list_empty(&osdc->req_lru)) { - req = list_entry(osdc->req_lru.next, struct ceph_osd_request, - r_req_lru_item); - - /* hasn't been long enough since we sent it? */ - if (time_before(jiffies, req->r_stamp + timeout)) - break; - - /* hasn't been long enough since it was acked? */ - if (req->r_request->ack_stamp == 0 || - time_before(jiffies, req->r_request->ack_stamp + timeout)) - break; - - BUG_ON(req == last_req && req->r_stamp == last_stamp); - last_req = req; - last_stamp = req->r_stamp; - - osd = req->r_osd; - BUG_ON(!osd); - pr_warning(" tid %llu timed out on osd%d, will reset osd\n", - req->r_tid, osd->o_osd); - __kick_osd_requests(osdc, osd); - } - /* * ping osds that are a bit slow. this ensures that if there * is a break in the TCP connection we will notice, and reopen -- cgit v1.2.3 From d2cc4dde9206aa2c7fb237aa689d3277cc070547 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 29 Nov 2012 08:37:03 -0600 Subject: bdi_register: add __printf verification, fix arg mismatch __printf is useful to verify format and arguments. Signed-off-by: Joe Perches Reviewed-by: Alex Elder --- fs/ceph/super.c | 2 +- include/linux/backing-dev.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fcda1c73a1e5..1a144001b2e1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -842,7 +842,7 @@ static int ceph_register_bdi(struct super_block *sb, fsc->backing_dev_info.ra_pages = default_backing_dev_info.ra_pages; - err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", + err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", atomic_long_inc_return(&bdi_seq)); if (!err) sb->s_bdi = &fsc->backing_dev_info; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2a9a9abc9126..12731a19ef06 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -114,6 +114,7 @@ struct backing_dev_info { int bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); +__printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); -- cgit v1.2.3 From 34e1169d996ab148490c01b65b4ee371cf8ffba2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Oct 2012 07:31:07 +1030 Subject: module: add syscall to load module from fd As part of the effort to create a stronger boundary between root and kernel, Chrome OS wants to be able to enforce that kernel modules are being loaded only from our read-only crypto-hash verified (dm_verity) root filesystem. Since the init_module syscall hands the kernel a module as a memory blob, no reasoning about the origin of the blob can be made. Earlier proposals for appending signatures to kernel modules would not be useful in Chrome OS, since it would involve adding an additional set of keys to our kernel and builds for no good reason: we already trust the contents of our root filesystem. We don't need to verify those kernel modules a second time. Having to do signature checking on module loading would slow us down and be redundant. All we need to know is where a module is coming from so we can say yes/no to loading it. If a file descriptor is used as the source of a kernel module, many more things can be reasoned about. In Chrome OS's case, we could enforce that the module lives on the filesystem we expect it to live on. In the case of IMA (or other LSMs), it would be possible, for example, to examine extended attributes that may contain signatures over the contents of the module. This introduces a new syscall (on x86), similar to init_module, that has only two arguments. The first argument is used as a file descriptor to the module and the second argument is a pointer to the NULL terminated string of module arguments. Signed-off-by: Kees Cook Cc: Andrew Morton Signed-off-by: Rusty Russell (merge fixes) --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 1 + kernel/module.c | 367 +++++++++++++++++++++++---------------- kernel/sys_ni.c | 1 + 5 files changed, 223 insertions(+), 148 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index a47103fbc692..83b3838417ed 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -356,3 +356,4 @@ 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp +350 i386 finit_module sys_finit_module diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index a582bfed95bb..7c58c84b7bc8 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -319,6 +319,7 @@ 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp +313 common finit_module sys_finit_module # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 727f0cd73921..32bc035bcd68 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); +asmlinkage long sys_finit_module(int fd, const char __user *uargs); #endif diff --git a/kernel/module.c b/kernel/module.c index 6e48c3a43599..6d2c4e4ca1f5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2425,18 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod, #endif #ifdef CONFIG_MODULE_SIG -static int module_sig_check(struct load_info *info, - const void *mod, unsigned long *_len) +static int module_sig_check(struct load_info *info) { int err = -ENOKEY; - unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; - unsigned long len = *_len; + const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; + const void *mod = info->hdr; - if (len > markerlen && - memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { + if (info->len > markerlen && + memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { /* We truncate the module to discard the signature */ - *_len -= markerlen; - err = mod_verify_sig(mod, _len); + info->len -= markerlen; + err = mod_verify_sig(mod, &info->len); } if (!err) { @@ -2454,59 +2454,97 @@ static int module_sig_check(struct load_info *info, return err; } #else /* !CONFIG_MODULE_SIG */ -static int module_sig_check(struct load_info *info, - void *mod, unsigned long *len) +static int module_sig_check(struct load_info *info) { return 0; } #endif /* !CONFIG_MODULE_SIG */ -/* Sets info->hdr, info->len and info->sig_ok. */ -static int copy_and_check(struct load_info *info, - const void __user *umod, unsigned long len, - const char __user *uargs) +/* Sanity checks against invalid binaries, wrong arch, weird elf version. */ +static int elf_header_check(struct load_info *info) { - int err; - Elf_Ehdr *hdr; + if (info->len < sizeof(*(info->hdr))) + return -ENOEXEC; + + if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 + || info->hdr->e_type != ET_REL + || !elf_check_arch(info->hdr) + || info->hdr->e_shentsize != sizeof(Elf_Shdr)) + return -ENOEXEC; + + if (info->hdr->e_shoff >= info->len + || (info->hdr->e_shnum * sizeof(Elf_Shdr) > + info->len - info->hdr->e_shoff)) + return -ENOEXEC; - if (len < sizeof(*hdr)) + return 0; +} + +/* Sets info->hdr and info->len. */ +static int copy_module_from_user(const void __user *umod, unsigned long len, + struct load_info *info) +{ + info->len = len; + if (info->len < sizeof(*(info->hdr))) return -ENOEXEC; /* Suck in entire file: we'll want most of it. */ - if ((hdr = vmalloc(len)) == NULL) + info->hdr = vmalloc(info->len); + if (!info->hdr) return -ENOMEM; - if (copy_from_user(hdr, umod, len) != 0) { - err = -EFAULT; - goto free_hdr; + if (copy_from_user(info->hdr, umod, info->len) != 0) { + vfree(info->hdr); + return -EFAULT; } - err = module_sig_check(info, hdr, &len); + return 0; +} + +/* Sets info->hdr and info->len. */ +static int copy_module_from_fd(int fd, struct load_info *info) +{ + struct file *file; + int err; + struct kstat stat; + loff_t pos; + ssize_t bytes = 0; + + file = fget(fd); + if (!file) + return -ENOEXEC; + + err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); if (err) - goto free_hdr; + goto out; - /* Sanity checks against insmoding binaries or wrong arch, - weird elf version */ - if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 - || hdr->e_type != ET_REL - || !elf_check_arch(hdr) - || hdr->e_shentsize != sizeof(Elf_Shdr)) { - err = -ENOEXEC; - goto free_hdr; + if (stat.size > INT_MAX) { + err = -EFBIG; + goto out; } - - if (hdr->e_shoff >= len || - hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) { - err = -ENOEXEC; - goto free_hdr; + info->hdr = vmalloc(stat.size); + if (!info->hdr) { + err = -ENOMEM; + goto out; } - info->hdr = hdr; - info->len = len; - return 0; + pos = 0; + while (pos < stat.size) { + bytes = kernel_read(file, pos, (char *)(info->hdr) + pos, + stat.size - pos); + if (bytes < 0) { + vfree(info->hdr); + err = bytes; + goto out; + } + if (bytes == 0) + break; + pos += bytes; + } + info->len = pos; -free_hdr: - vfree(hdr); +out: + fput(file); return err; } @@ -2945,33 +2983,123 @@ static bool finished_loading(const char *name) return ret; } +/* Call module constructors. */ +static void do_mod_ctors(struct module *mod) +{ +#ifdef CONFIG_CONSTRUCTORS + unsigned long i; + + for (i = 0; i < mod->num_ctors; i++) + mod->ctors[i](); +#endif +} + +/* This is where the real work happens */ +static int do_init_module(struct module *mod) +{ + int ret = 0; + + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_COMING, mod); + + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + + do_mod_ctors(mod); + /* Start the module */ + if (mod->init != NULL) + ret = do_one_initcall(mod->init); + if (ret < 0) { + /* Init routine failed: abort. Try to protect us from + buggy refcounters. */ + mod->state = MODULE_STATE_GOING; + synchronize_sched(); + module_put(mod); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + free_module(mod); + wake_up_all(&module_wq); + return ret; + } + if (ret > 0) { + printk(KERN_WARNING +"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" +"%s: loading module anyway...\n", + __func__, mod->name, ret, + __func__); + dump_stack(); + } + + /* Now it's a first class citizen! */ + mod->state = MODULE_STATE_LIVE; + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_LIVE, mod); + + /* We need to finish all async code before the module init sequence is done */ + async_synchronize_full(); + + mutex_lock(&module_mutex); + /* Drop initial reference. */ + module_put(mod); + trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; +#endif + unset_module_init_ro_nx(mod); + module_free(mod, mod->module_init); + mod->module_init = NULL; + mod->init_size = 0; + mod->init_ro_size = 0; + mod->init_text_size = 0; + mutex_unlock(&module_mutex); + wake_up_all(&module_wq); + + return 0; +} + +static int may_init_module(void) +{ + if (!capable(CAP_SYS_MODULE) || modules_disabled) + return -EPERM; + + return 0; +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static struct module *load_module(void __user *umod, - unsigned long len, - const char __user *uargs) +static int load_module(struct load_info *info, const char __user *uargs) { - struct load_info info = { NULL, }; struct module *mod, *old; long err; - pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", - umod, len, uargs); + err = module_sig_check(info); + if (err) + goto free_copy; - /* Copy in the blobs from userspace, check they are vaguely sane. */ - err = copy_and_check(&info, umod, len, uargs); + err = elf_header_check(info); if (err) - return ERR_PTR(err); + goto free_copy; /* Figure out module layout, and allocate all the memory. */ - mod = layout_and_allocate(&info); + mod = layout_and_allocate(info); if (IS_ERR(mod)) { err = PTR_ERR(mod); goto free_copy; } #ifdef CONFIG_MODULE_SIG - mod->sig_ok = info.sig_ok; + mod->sig_ok = info->sig_ok; if (!mod->sig_ok) add_taint_module(mod, TAINT_FORCED_MODULE); #endif @@ -2983,25 +3111,25 @@ static struct module *load_module(void __user *umod, /* Now we've got everything in the final locations, we can * find optional sections. */ - find_module_sections(mod, &info); + find_module_sections(mod, info); err = check_module_license_and_versions(mod); if (err) goto free_unload; /* Set up MODINFO_ATTR fields */ - setup_modinfo(mod, &info); + setup_modinfo(mod, info); /* Fix up syms, so that st_value is a pointer to location. */ - err = simplify_symbols(mod, &info); + err = simplify_symbols(mod, info); if (err < 0) goto free_modinfo; - err = apply_relocations(mod, &info); + err = apply_relocations(mod, info); if (err < 0) goto free_modinfo; - err = post_relocation(mod, &info); + err = post_relocation(mod, info); if (err < 0) goto free_modinfo; @@ -3041,14 +3169,14 @@ again: } /* This has to be done once we're sure module name is unique. */ - dynamic_debug_setup(info.debug, info.num_debug); + dynamic_debug_setup(info->debug, info->num_debug); /* Find duplicate symbols */ err = verify_export_symbols(mod); if (err < 0) goto ddebug; - module_bug_finalize(info.hdr, info.sechdrs, mod); + module_bug_finalize(info->hdr, info->sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -3059,16 +3187,17 @@ again: goto unlink; /* Link in to syfs. */ - err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); + err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); if (err < 0) goto unlink; /* Get rid of temporary copy. */ - free_copy(&info); + free_copy(info); /* Done! */ trace_module_load(mod); - return mod; + + return do_init_module(mod); unlink: mutex_lock(&module_mutex); @@ -3077,7 +3206,7 @@ again: module_bug_cleanup(mod); wake_up_all(&module_wq); ddebug: - dynamic_debug_remove(info.debug); + dynamic_debug_remove(info->debug); unlock: mutex_unlock(&module_mutex); synchronize_sched(); @@ -3089,106 +3218,48 @@ again: free_unload: module_unload_free(mod); free_module: - module_deallocate(mod, &info); + module_deallocate(mod, info); free_copy: - free_copy(&info); - return ERR_PTR(err); -} - -/* Call module constructors. */ -static void do_mod_ctors(struct module *mod) -{ -#ifdef CONFIG_CONSTRUCTORS - unsigned long i; - - for (i = 0; i < mod->num_ctors; i++) - mod->ctors[i](); -#endif + free_copy(info); + return err; } -/* This is where the real work happens */ SYSCALL_DEFINE3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs) { - struct module *mod; - int ret = 0; - - /* Must have permission */ - if (!capable(CAP_SYS_MODULE) || modules_disabled) - return -EPERM; + int err; + struct load_info info = { }; - /* Do all the hard work */ - mod = load_module(umod, len, uargs); - if (IS_ERR(mod)) - return PTR_ERR(mod); + err = may_init_module(); + if (err) + return err; - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_COMING, mod); + pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n", + umod, len, uargs); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); + err = copy_module_from_user(umod, len, &info); + if (err) + return err; - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); + return load_module(&info, uargs); +} - do_mod_ctors(mod); - /* Start the module */ - if (mod->init != NULL) - ret = do_one_initcall(mod->init); - if (ret < 0) { - /* Init routine failed: abort. Try to protect us from - buggy refcounters. */ - mod->state = MODULE_STATE_GOING; - synchronize_sched(); - module_put(mod); - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_GOING, mod); - free_module(mod); - wake_up_all(&module_wq); - return ret; - } - if (ret > 0) { - printk(KERN_WARNING -"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" -"%s: loading module anyway...\n", - __func__, mod->name, ret, - __func__); - dump_stack(); - } +SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) +{ + int err; + struct load_info info = { }; - /* Now it's a first class citizen! */ - mod->state = MODULE_STATE_LIVE; - blocking_notifier_call_chain(&module_notify_list, - MODULE_STATE_LIVE, mod); + err = may_init_module(); + if (err) + return err; - /* We need to finish all async code before the module init sequence is done */ - async_synchronize_full(); + pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs); - mutex_lock(&module_mutex); - /* Drop initial reference. */ - module_put(mod); - trim_init_extable(mod); -#ifdef CONFIG_KALLSYMS - mod->num_symtab = mod->core_num_syms; - mod->symtab = mod->core_symtab; - mod->strtab = mod->core_strtab; -#endif - unset_module_init_ro_nx(mod); - module_free(mod, mod->module_init); - mod->module_init = NULL; - mod->init_size = 0; - mod->init_ro_size = 0; - mod->init_text_size = 0; - mutex_unlock(&module_mutex); - wake_up_all(&module_wq); + err = copy_module_from_fd(fd, &info); + if (err) + return err; - return 0; + return load_module(&info, uargs); } static inline int within(unsigned long addr, void *start, unsigned long size) diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index dbff751e4086..395084d4ce16 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -25,6 +25,7 @@ cond_syscall(sys_swapoff); cond_syscall(sys_kexec_load); cond_syscall(compat_sys_kexec_load); cond_syscall(sys_init_module); +cond_syscall(sys_finit_module); cond_syscall(sys_delete_module); cond_syscall(sys_socketpair); cond_syscall(sys_bind); -- cgit v1.2.3 From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Oct 2012 18:09:41 +1030 Subject: module: add flags arg to sys_finit_module() Thanks to Michael Kerrisk for keeping us honest. These flags are actually useful for eliminating the only case where kmod has to mangle a module's internals: for overriding module versioning. Signed-off-by: Rusty Russell Acked-by: Lucas De Marchi Acked-by: Kees Cook --- include/linux/syscalls.h | 2 +- include/uapi/linux/module.h | 8 ++++++++ kernel/module.c | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 include/uapi/linux/module.h (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 32bc035bcd68..8cf7b508cb50 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); -asmlinkage long sys_finit_module(int fd, const char __user *uargs); +asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); #endif diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h new file mode 100644 index 000000000000..38da4258b12f --- /dev/null +++ b/include/uapi/linux/module.h @@ -0,0 +1,8 @@ +#ifndef _UAPI_LINUX_MODULE_H +#define _UAPI_LINUX_MODULE_H + +/* Flags for sys_finit_module: */ +#define MODULE_INIT_IGNORE_MODVERSIONS 1 +#define MODULE_INIT_IGNORE_VERMAGIC 2 + +#endif /* _UAPI_LINUX_MODULE_H */ diff --git a/kernel/module.c b/kernel/module.c index 6d2c4e4ca1f5..1395ca382fb5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -60,6 +60,7 @@ #include #include #include +#include #include "module-internal.h" #define CREATE_TRACE_POINTS @@ -2553,7 +2554,7 @@ static void free_copy(struct load_info *info) vfree(info->hdr); } -static int rewrite_section_headers(struct load_info *info) +static int rewrite_section_headers(struct load_info *info, int flags) { unsigned int i; @@ -2581,7 +2582,10 @@ static int rewrite_section_headers(struct load_info *info) } /* Track but don't keep modinfo and version sections. */ - info->index.vers = find_sec(info, "__versions"); + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) + info->index.vers = 0; /* Pretend no __versions section! */ + else + info->index.vers = find_sec(info, "__versions"); info->index.info = find_sec(info, ".modinfo"); info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -2596,7 +2600,7 @@ static int rewrite_section_headers(struct load_info *info) * Return the temporary module pointer (we'll replace it with the final * one when we move the module sections around). */ -static struct module *setup_load_info(struct load_info *info) +static struct module *setup_load_info(struct load_info *info, int flags) { unsigned int i; int err; @@ -2607,7 +2611,7 @@ static struct module *setup_load_info(struct load_info *info) info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - err = rewrite_section_headers(info); + err = rewrite_section_headers(info, flags); if (err) return ERR_PTR(err); @@ -2645,11 +2649,14 @@ static struct module *setup_load_info(struct load_info *info) return mod; } -static int check_modinfo(struct module *mod, struct load_info *info) +static int check_modinfo(struct module *mod, struct load_info *info, int flags) { const char *modmagic = get_modinfo(info, "vermagic"); int err; + if (flags & MODULE_INIT_IGNORE_VERMAGIC) + modmagic = NULL; + /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { err = try_to_force_load(mod, "bad vermagic"); @@ -2885,18 +2892,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr, return 0; } -static struct module *layout_and_allocate(struct load_info *info) +static struct module *layout_and_allocate(struct load_info *info, int flags) { /* Module within temporary copy. */ struct module *mod; Elf_Shdr *pcpusec; int err; - mod = setup_load_info(info); + mod = setup_load_info(info, flags); if (IS_ERR(mod)) return mod; - err = check_modinfo(mod, info); + err = check_modinfo(mod, info, flags); if (err) return ERR_PTR(err); @@ -3078,7 +3085,8 @@ static int may_init_module(void) /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static int load_module(struct load_info *info, const char __user *uargs) +static int load_module(struct load_info *info, const char __user *uargs, + int flags) { struct module *mod, *old; long err; @@ -3092,7 +3100,7 @@ static int load_module(struct load_info *info, const char __user *uargs) goto free_copy; /* Figure out module layout, and allocate all the memory. */ - mod = layout_and_allocate(info); + mod = layout_and_allocate(info, flags); if (IS_ERR(mod)) { err = PTR_ERR(mod); goto free_copy; @@ -3241,10 +3249,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, 0); } -SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) +SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { int err; struct load_info info = { }; @@ -3253,13 +3261,17 @@ SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) if (err) return err; - pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs); + pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); + + if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS + |MODULE_INIT_IGNORE_VERMAGIC)) + return -EINVAL; err = copy_module_from_fd(fd, &info); if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, flags); } static inline int within(unsigned long addr, void *start, unsigned long size) -- cgit v1.2.3 From 2e72d51b4ac32989496870cd8171b3682fea1839 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 16 Oct 2012 07:32:07 +1030 Subject: security: introduce kernel_module_from_file hook Now that kernel module origins can be reasoned about, provide a hook to the LSMs to make policy decisions about the module file. This will let Chrome OS enforce that loadable kernel modules can only come from its read-only hash-verified root filesystem. Other LSMs can, for example, read extended attributes for signatures, etc. Signed-off-by: Kees Cook Acked-by: Serge E. Hallyn Acked-by: Eric Paris Acked-by: Mimi Zohar Acked-by: James Morris Signed-off-by: Rusty Russell --- include/linux/security.h | 13 +++++++++++++ kernel/module.c | 11 +++++++++++ security/capability.c | 6 ++++++ security/security.c | 5 +++++ 4 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 05e88bdcf7d9..0f6afc657f77 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -694,6 +694,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * userspace to load a kernel module with the given name. * @kmod_name name of the module requested by the kernel * Return 0 if successful. + * @kernel_module_from_file: + * Load a kernel module from userspace. + * @file contains the file structure pointing to the file containing + * the kernel module to load. If the module is being loaded from a blob, + * this argument will be NULL. + * Return 0 if permission is granted. * @task_fix_setuid: * Update the module's state after setting one or more of the user * identity attributes of the current process. The @flags parameter @@ -1508,6 +1514,7 @@ struct security_operations { int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*kernel_module_request)(char *kmod_name); + int (*kernel_module_from_file)(struct file *file); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); int (*task_setpgid) (struct task_struct *p, pid_t pgid); @@ -1765,6 +1772,7 @@ void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); +int security_kernel_module_from_file(struct file *file); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); @@ -2278,6 +2286,11 @@ static inline int security_kernel_module_request(char *kmod_name) return 0; } +static inline int security_kernel_module_from_file(struct file *file) +{ + return 0; +} + static inline int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags) diff --git a/kernel/module.c b/kernel/module.c index 1395ca382fb5..a1d2ed8bab93 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -2485,10 +2486,16 @@ static int elf_header_check(struct load_info *info) static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) { + int err; + info->len = len; if (info->len < sizeof(*(info->hdr))) return -ENOEXEC; + err = security_kernel_module_from_file(NULL); + if (err) + return err; + /* Suck in entire file: we'll want most of it. */ info->hdr = vmalloc(info->len); if (!info->hdr) @@ -2515,6 +2522,10 @@ static int copy_module_from_fd(int fd, struct load_info *info) if (!file) return -ENOEXEC; + err = security_kernel_module_from_file(file); + if (err) + goto out; + err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); if (err) goto out; diff --git a/security/capability.c b/security/capability.c index b14a30c234b8..0fe5a026aef8 100644 --- a/security/capability.c +++ b/security/capability.c @@ -395,6 +395,11 @@ static int cap_kernel_module_request(char *kmod_name) return 0; } +static int cap_kernel_module_from_file(struct file *file) +{ + return 0; +} + static int cap_task_setpgid(struct task_struct *p, pid_t pgid) { return 0; @@ -967,6 +972,7 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, kernel_act_as); set_to_cap_if_null(ops, kernel_create_files_as); set_to_cap_if_null(ops, kernel_module_request); + set_to_cap_if_null(ops, kernel_module_from_file); set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setpgid); set_to_cap_if_null(ops, task_getpgid); diff --git a/security/security.c b/security/security.c index 8dcd4ae10a5f..ce88630de15d 100644 --- a/security/security.c +++ b/security/security.c @@ -820,6 +820,11 @@ int security_kernel_module_request(char *kmod_name) return security_ops->kernel_module_request(kmod_name); } +int security_kernel_module_from_file(struct file *file) +{ + return security_ops->kernel_module_from_file(file); +} + int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { -- cgit v1.2.3 From fdf90729e57812cb12d7938e2dee7c71e875fb08 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 16 Oct 2012 12:40:08 +1030 Subject: ima: support new kernel module syscall With the addition of the new kernel module syscall, which defines two arguments - a file descriptor to the kernel module and a pointer to a NULL terminated string of module arguments - it is now possible to measure and appraise kernel modules like any other file on the file system. This patch adds support to measure and appraise kernel modules in an extensible and consistent manner. To support filesystems without extended attribute support, additional patches could pass the signature as the first parameter. Signed-off-by: Mimi Zohar Signed-off-by: Rusty Russell --- Documentation/ABI/testing/ima_policy | 3 ++- include/linux/ima.h | 6 ++++++ security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_api.c | 4 ++-- security/integrity/ima/ima_main.c | 21 +++++++++++++++++++++ security/integrity/ima/ima_policy.c | 3 +++ security/security.c | 7 ++++++- 7 files changed, 41 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index 986946613542..ec0a38ef3145 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -23,7 +23,7 @@ Description: lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] - base: func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK] + base: func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK][MODULE_CHECK] mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC] fsmagic:= hex value uid:= decimal value @@ -53,6 +53,7 @@ Description: measure func=BPRM_CHECK measure func=FILE_MMAP mask=MAY_EXEC measure func=FILE_CHECK mask=MAY_READ uid=0 + measure func=MODULE_CHECK uid=0 appraise fowner=0 The default policy measures all executables in bprm_check, diff --git a/include/linux/ima.h b/include/linux/ima.h index 2c7223d7e73b..86c361e947b9 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -18,6 +18,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_file_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); +extern int ima_module_check(struct file *file); #else static inline int ima_bprm_check(struct linux_binprm *bprm) @@ -40,6 +41,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) return 0; } +static inline int ima_module_check(struct file *file) +{ + return 0; +} + #endif /* CONFIG_IMA_H */ #ifdef CONFIG_IMA_APPRAISE diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 6ee8826662cc..3b2adb794f15 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -127,7 +127,7 @@ struct integrity_iint_cache *integrity_iint_insert(struct inode *inode); struct integrity_iint_cache *integrity_iint_find(struct inode *inode); /* IMA policy related functions */ -enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, POST_SETATTR }; +enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, MODULE_CHECK, POST_SETATTR }; int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask, int flags); diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index b356884fb3ef..0cea3db21657 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -100,12 +100,12 @@ err_out: * ima_get_action - appraise & measure decision based on policy. * @inode: pointer to inode to measure * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXECUTE) - * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP) + * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP, MODULE_CHECK) * * The policy is defined in terms of keypairs: * subj=, obj=, type=, func=, mask=, fsmagic= * subj,obj, and type: are LSM specific. - * func: FILE_CHECK | BPRM_CHECK | FILE_MMAP + * func: FILE_CHECK | BPRM_CHECK | FILE_MMAP | MODULE_CHECK * mask: contains the permission mask * fsmagic: hex value * diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 73c9a268253e..45de18e9a6f2 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -280,6 +280,27 @@ int ima_file_check(struct file *file, int mask) } EXPORT_SYMBOL_GPL(ima_file_check); +/** + * ima_module_check - based on policy, collect/store/appraise measurement. + * @file: pointer to the file to be measured/appraised + * + * Measure/appraise kernel modules based on policy. + * + * Always return 0 and audit dentry_open failures. + * Return code is based upon measurement appraisal. + */ +int ima_module_check(struct file *file) +{ + int rc; + + if (!file) + rc = INTEGRITY_UNKNOWN; + else + rc = process_measurement(file, file->f_dentry->d_name.name, + MAY_EXEC, MODULE_CHECK); + return (ima_appraise & IMA_APPRAISE_ENFORCE) ? rc : 0; +} + static int __init init_ima(void) { int error; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index c7dacd2eab7a..af7d182d5a46 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -80,6 +80,7 @@ static struct ima_rule_entry default_rules[] = { .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE,.func = FILE_CHECK,.mask = MAY_READ,.uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + {.action = MEASURE,.func = MODULE_CHECK, .flags = IMA_FUNC}, }; static struct ima_rule_entry default_appraise_rules[] = { @@ -401,6 +402,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) /* PATH_CHECK is for backwards compat */ else if (strcmp(args[0].from, "PATH_CHECK") == 0) entry->func = FILE_CHECK; + else if (strcmp(args[0].from, "MODULE_CHECK") == 0) + entry->func = MODULE_CHECK; else if (strcmp(args[0].from, "FILE_MMAP") == 0) entry->func = FILE_MMAP; else if (strcmp(args[0].from, "BPRM_CHECK") == 0) diff --git a/security/security.c b/security/security.c index ce88630de15d..daa97f4ac9d1 100644 --- a/security/security.c +++ b/security/security.c @@ -822,7 +822,12 @@ int security_kernel_module_request(char *kmod_name) int security_kernel_module_from_file(struct file *file) { - return security_ops->kernel_module_from_file(file); + int ret; + + ret = security_ops->kernel_module_from_file(file); + if (ret) + return ret; + return ima_module_check(file); } int security_task_fix_setuid(struct cred *new, const struct cred *old, -- cgit v1.2.3 From 6f33d58794ef4cef4b2c706029810f9688bd3026 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Nov 2012 12:30:25 +1030 Subject: __UNIQUE_ID() Jan Beulich points out __COUNTER__ (gcc 4.3 and above), so let's use that to create unique ids. This is better than __LINE__ which we use today, so provide a wrapper. Stanislaw Gruszka reported that some module parameters start with a digit, so we need to prepend when we for the unique id. Signed-off-by: Rusty Russell Acked-by: Jan Beulich --- include/linux/compiler-gcc4.h | 2 ++ include/linux/compiler.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 412bc6c2b023..56c802cba7f6 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -31,6 +31,8 @@ #define __linktime_error(message) __attribute__((__error__(message))) +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + #if __GNUC_MINOR__ >= 5 /* * Mark a position in code as unreachable. This can be used to diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f430e4162f41..5f45335e1ac7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -42,6 +42,10 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __rcu #endif +/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ +#define ___PASTE(a,b) a##b +#define __PASTE(a,b) ___PASTE(a,b) + #ifdef __KERNEL__ #ifdef __GNUC__ @@ -164,6 +168,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); (typeof(ptr)) (__ptr + (off)); }) #endif +/* Not-quite-unique ID. */ +#ifndef __UNIQUE_ID +# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) +#endif + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 34182eea36fc1d70d748b0947c873314980ba806 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Nov 2012 12:30:25 +1030 Subject: moduleparam: use __UNIQUE_ID() Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index d6a58065c09c..137b4198fc03 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -16,17 +16,15 @@ /* Chosen so that structs with an unsigned long line up. */ #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) -#define ___module_cat(a,b) __mod_ ## a ## b -#define __module_cat(a,b) ___module_cat(a,b) #ifdef MODULE #define __MODULE_INFO(tag, name, info) \ -static const char __module_cat(name,__LINE__)[] \ +static const char __UNIQUE_ID(name)[] \ __used __attribute__((section(".modinfo"), unused, aligned(1))) \ = __stringify(tag) "=" info #else /* !MODULE */ /* This struct is here for syntactic coherency, it is not used */ #define __MODULE_INFO(tag, name, info) \ - struct __module_cat(name,__LINE__) {} + struct __UNIQUE_ID(name) {} #endif #define __MODULE_PARM_TYPE(name, _type) \ __MODULE_INFO(parmtype, name##type, #name ":" _type) -- cgit v1.2.3 From facc0a6bd494ce21e31b34fc355ecf702518272b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 11:55:28 +1030 Subject: ASN.1: Define indefinite length marker constant Define a constant to hold the marker value seen in an indefinite-length element. Signed-off-by: David Howells Signed-off-by: Rusty Russell --- include/linux/asn1.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/asn1.h b/include/linux/asn1.h index 5c3f4e4b9a23..eed6982860ba 100644 --- a/include/linux/asn1.h +++ b/include/linux/asn1.h @@ -64,4 +64,6 @@ enum asn1_tag { ASN1_LONG_TAG = 31 /* Long form tag */ }; +#define ASN1_INDEFINITE_LENGTH 0x80 + #endif /* _LINUX_ASN1_H */ -- cgit v1.2.3 From 8349e5aeaadd160b7cce554a62a05be4b2d894aa Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 14 Dec 2012 09:34:01 -0500 Subject: Revert "libata: check SATA_SETTINGS log with HW Feature Ctrl" This reverts commit de90cd71f68e947d3bd6c3f2ef5731ead010a768. Shane Huang writes: Please suspend this patch because I just received two new DevSlp drives but found word 78 bit 5 is _not_ set. I'm checking with the drive vendor whether he gave me the wrong information. If bit 5 is not the necessary and sufficient condition, I will implement another patch to replace ata_device->sata_settings into ->devslp_timing. Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 3 ++- include/linux/ata.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 47d59616fe3d..9e8b99af400d 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2331,8 +2331,9 @@ int ata_dev_configure(struct ata_device *dev) /* Obtain SATA Settings page from Identify Device Data Log, * which contains DevSlp timing variables etc. + * Exclude old devices with ata_id_has_ncq() */ - if (ata_id_has_hw_feature_ctrl(dev->id)) { + if (ata_id_has_ncq(dev->id)) { err_mask = ata_read_log_page(dev, ATA_LOG_SATA_ID_DEV_DATA, ATA_LOG_SATA_SETTINGS, diff --git a/include/linux/ata.h b/include/linux/ata.h index 18cbb93fdbca..408da9502177 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -593,7 +593,6 @@ static inline int ata_is_data(u8 prot) #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) -#define ata_id_has_hw_feature_ctrl(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 5)) static inline bool ata_id_has_hipm(const u16 *id) { -- cgit v1.2.3 From 8dd2cb7e880d2f77fba53b523c99133ad5054cfd Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 14 Dec 2012 11:15:36 +0800 Subject: block: discard granularity might not be power of 2 In MD raid case, discard granularity might not be power of 2, for example, a 4-disk raid5 has 3*chunk_size discard granularity. Correct the calculation for such cases. Reported-by: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-lib.c | 23 +++++++++++++---------- block/blk-settings.c | 6 +++--- include/linux/blkdev.h | 7 ++++--- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/block/blk-lib.c b/block/blk-lib.c index 9373b58dfab1..5677fd33d7d2 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -43,8 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); int type = REQ_WRITE | REQ_DISCARD; - unsigned int max_discard_sectors; - unsigned int granularity, alignment, mask; + sector_t max_discard_sectors; + sector_t granularity, alignment; struct bio_batch bb; struct bio *bio; int ret = 0; @@ -57,15 +57,16 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* Zero-sector (unknown) and one-sector granularities are the same. */ granularity = max(q->limits.discard_granularity >> 9, 1U); - mask = granularity - 1; - alignment = (bdev_discard_alignment(bdev) >> 9) & mask; + alignment = bdev_discard_alignment(bdev) >> 9; + alignment = sector_div(alignment, granularity); /* * Ensure that max_discard_sectors is of the proper * granularity, so that requests stay aligned after a split. */ max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); - max_discard_sectors = round_down(max_discard_sectors, granularity); + sector_div(max_discard_sectors, granularity); + max_discard_sectors *= granularity; if (unlikely(!max_discard_sectors)) { /* Avoid infinite loop below. Being cautious never hurts. */ return -EOPNOTSUPP; @@ -83,7 +84,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, while (nr_sects) { unsigned int req_sects; - sector_t end_sect; + sector_t end_sect, tmp; bio = bio_alloc(gfp_mask, 1); if (!bio) { @@ -98,10 +99,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, * misaligned, stop the discard at the previous aligned sector. */ end_sect = sector + req_sects; - if (req_sects < nr_sects && (end_sect & mask) != alignment) { - end_sect = - round_down(end_sect - alignment, granularity) - + alignment; + tmp = end_sect; + if (req_sects < nr_sects && + sector_div(tmp, granularity) != alignment) { + end_sect = end_sect - alignment; + sector_div(end_sect, granularity); + end_sect = end_sect * granularity + alignment; req_sects = end_sect - sector; } diff --git a/block/blk-settings.c b/block/blk-settings.c index 779bb7646bcd..c50ecf0ea3b1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -611,7 +611,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, bottom = b->discard_granularity + alignment; /* Verify that top and bottom intervals line up */ - if (max(top, bottom) & (min(top, bottom) - 1)) + if ((max(top, bottom) % min(top, bottom)) != 0) t->discard_misaligned = 1; } @@ -619,8 +619,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_discard_sectors); t->discard_granularity = max(t->discard_granularity, b->discard_granularity); - t->discard_alignment = lcm(t->discard_alignment, alignment) & - (t->discard_granularity - 1); + t->discard_alignment = lcm(t->discard_alignment, alignment) % + t->discard_granularity; } return ret; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c9d233e727f2..acb4f7bbbd32 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1188,13 +1188,14 @@ static inline int queue_discard_alignment(struct request_queue *q) static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) { - unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); + sector_t alignment = sector << 9; + alignment = sector_div(alignment, lim->discard_granularity); if (!lim->max_discard_sectors) return 0; - return (lim->discard_granularity + lim->discard_alignment - alignment) - & (lim->discard_granularity - 1); + alignment = lim->discard_granularity + lim->discard_alignment - alignment; + return sector_div(alignment, lim->discard_granularity); } static inline int bdev_discard_alignment(struct block_device *bdev) -- cgit v1.2.3 From 63b68901dfd590cc13d4fe5c08dec2ca75b3c4aa Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 14 Dec 2012 09:09:11 -0800 Subject: mfd: omap-usb-host: get rid of cpu_is_omap..() macros Instead of using cpu_is_omap..() macros in the device driver we rely on information provided in the platform data. The only information we need is whether the USB Host module has a single ULPI bypass control bit for all ports or individual bypass control bits for each port. OMAP3 REV2.1 and earlier have the former. Signed-off-by: Roger Quadros Acked-by: Samuel Ortiz [tony@atomide.com: updated to remove plat/cpu.h] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/usb-host.c | 4 ++++ drivers/mfd/omap-usb-host.c | 3 +-- include/linux/platform_data/usb-omap.h | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c index d1dbe125b34f..2e44e8a22884 100644 --- a/arch/arm/mach-omap2/usb-host.c +++ b/arch/arm/mach-omap2/usb-host.c @@ -508,6 +508,10 @@ void __init usbhs_init(const struct usbhs_omap_board_data *pdata) if (cpu_is_omap34xx()) { setup_ehci_io_mux(pdata->port_mode); setup_ohci_io_mux(pdata->port_mode); + + if (omap_rev() <= OMAP3430_REV_ES2_1) + usbhs_data.single_ulpi_bypass = true; + } else if (cpu_is_omap44xx()) { setup_4430ehci_io_mux(pdata->port_mode); setup_4430ohci_io_mux(pdata->port_mode); diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 770a0d01e0b9..05164d7f054b 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -384,7 +383,7 @@ static void omap_usbhs_init(struct device *dev) reg &= ~OMAP_UHH_HOSTCONFIG_P3_CONNECT_STATUS; /* Bypass the TLL module for PHY mode operation */ - if (cpu_is_omap3430() && (omap_rev() <= OMAP3430_REV_ES2_1)) { + if (pdata->single_ulpi_bypass) { dev_dbg(dev, "OMAP3 ES version <= ES2.1\n"); if (is_ehci_phy_mode(pdata->port_mode[0]) || is_ehci_phy_mode(pdata->port_mode[1]) || diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h index 8570bcfe6311..ef65b67c56c3 100644 --- a/include/linux/platform_data/usb-omap.h +++ b/include/linux/platform_data/usb-omap.h @@ -59,6 +59,9 @@ struct usbhs_omap_platform_data { struct ehci_hcd_omap_platform_data *ehci_data; struct ohci_hcd_omap_platform_data *ohci_data; + + /* OMAP3 <= ES2.1 have a single ulpi bypass control bit */ + unsigned single_ulpi_bypass:1; }; /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From d9ba573718666df2e7e30d671f81bba39d07f91c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 14 Dec 2012 09:09:11 -0800 Subject: ARM: OMAP: Move plat/omap-serial.h to include/linux/platform_data/serial-omap.h We need to move this file to allow ARM multiplatform configurations to build for omap2+. This can now be done as this file now only contains platform_data. cc: Russell King cc: Alan Cox cc: Greg Kroah-Hartman cc: Govindraj.R cc: Kevin Hilman cc: linux-serial@vger.kernel.org Reviewed-by: Felipe Balbi Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/serial.c | 3 +- arch/arm/plat-omap/include/plat/omap-serial.h | 51 --------------------------- drivers/tty/serial/omap-serial.c | 3 +- include/linux/platform_data/serial-omap.h | 51 +++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 55 deletions(-) delete mode 100644 arch/arm/plat-omap/include/plat/omap-serial.h create mode 100644 include/linux/platform_data/serial-omap.h (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index 93d102535c85..04fdbc4c499b 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -27,8 +27,7 @@ #include #include #include - -#include +#include #include "common.h" #include "omap_hwmod.h" diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h deleted file mode 100644 index ff9b0aab5281..000000000000 --- a/arch/arm/plat-omap/include/plat/omap-serial.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Driver for OMAP-UART controller. - * Based on drivers/serial/8250.c - * - * Copyright (C) 2010 Texas Instruments. - * - * Authors: - * Govindraj R - * Thara Gopinath - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __OMAP_SERIAL_H__ -#define __OMAP_SERIAL_H__ - -#include -#include -#include - -#define DRIVER_NAME "omap_uart" - -/* - * Use tty device name as ttyO, [O -> OMAP] - * in bootargs we specify as console=ttyO0 if uart1 - * is used as console uart. - */ -#define OMAP_SERIAL_NAME "ttyO" - -struct omap_uart_port_info { - bool dma_enabled; /* To specify DMA Mode */ - unsigned int uartclk; /* UART clock rate */ - upf_t flags; /* UPF_* flags */ - unsigned int dma_rx_buf_size; - unsigned int dma_rx_timeout; - unsigned int autosuspend_timeout; - unsigned int dma_rx_poll_rate; - int DTR_gpio; - int DTR_inverted; - int DTR_present; - - int (*get_context_loss_count)(struct device *); - void (*set_forceidle)(struct device *); - void (*set_noidle)(struct device *); - void (*enable_wakeup)(struct device *, bool); -}; - -#endif /* __OMAP_SERIAL_H__ */ diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 23f797eb7a28..57d6b29c039c 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -41,8 +41,7 @@ #include #include #include - -#include +#include #define OMAP_MAX_HSUART_PORTS 6 diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h new file mode 100644 index 000000000000..ff9b0aab5281 --- /dev/null +++ b/include/linux/platform_data/serial-omap.h @@ -0,0 +1,51 @@ +/* + * Driver for OMAP-UART controller. + * Based on drivers/serial/8250.c + * + * Copyright (C) 2010 Texas Instruments. + * + * Authors: + * Govindraj R + * Thara Gopinath + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __OMAP_SERIAL_H__ +#define __OMAP_SERIAL_H__ + +#include +#include +#include + +#define DRIVER_NAME "omap_uart" + +/* + * Use tty device name as ttyO, [O -> OMAP] + * in bootargs we specify as console=ttyO0 if uart1 + * is used as console uart. + */ +#define OMAP_SERIAL_NAME "ttyO" + +struct omap_uart_port_info { + bool dma_enabled; /* To specify DMA Mode */ + unsigned int uartclk; /* UART clock rate */ + upf_t flags; /* UPF_* flags */ + unsigned int dma_rx_buf_size; + unsigned int dma_rx_timeout; + unsigned int autosuspend_timeout; + unsigned int dma_rx_poll_rate; + int DTR_gpio; + int DTR_inverted; + int DTR_present; + + int (*get_context_loss_count)(struct device *); + void (*set_forceidle)(struct device *); + void (*set_noidle)(struct device *); + void (*enable_wakeup)(struct device *, bool); +}; + +#endif /* __OMAP_SERIAL_H__ */ -- cgit v1.2.3 From 8e63b6a8adabb0551124c3b78f7f5f36912c3728 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Dec 2012 15:21:52 -0500 Subject: NFSv4.1: Move the RPC timestamp out of the slot. Shave a few bytes off the slot table size by moving the RPC timestamp into the sequence results. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 +++++++------- fs/nfs/nfs4session.c | 3 +-- fs/nfs/nfs4session.h | 1 - include/linux/nfs_xdr.h | 1 + 4 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9003b8f6b77f..afb428e63b52 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -419,7 +419,6 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * { struct nfs4_session *session; struct nfs4_slot *slot; - unsigned long timestamp; struct nfs_client *clp; int ret = 1; @@ -444,9 +443,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * case 0: /* Update the slot's sequence and clientid lease timer */ ++slot->seq_nr; - timestamp = slot->renewal_time; clp = session->clp; - do_renew_lease(clp, timestamp); + do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ if (res->sr_status_flags != 0) nfs4_schedule_lease_recovery(clp); @@ -473,10 +471,11 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * Could this slot have been previously retired? * If so, then the server may be expecting seq_nr = 1! */ - if (slot->seq_nr == 1) - break; - slot->seq_nr = 1; - goto retry_nowait; + if (slot->seq_nr != 1) { + slot->seq_nr = 1; + goto retry_nowait; + } + break; case -NFS4ERR_SEQ_FALSE_RETRY: ++slot->seq_nr; goto retry_nowait; @@ -567,6 +566,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, slot->slot_nr, slot->seq_nr); res->sr_slot = slot; + res->sr_timestamp = jiffies; res->sr_status_flags = 0; /* * sr_status is only set in decode_sequence, and so will remain diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 1e6c87c443a7..0e1cc1f4e51a 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -143,7 +143,6 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) if (slotid > tbl->highest_used_slotid || tbl->highest_used_slotid == NFS4_NO_SLOT) tbl->highest_used_slotid = slotid; - ret->renewal_time = jiffies; ret->generation = tbl->generation; out: @@ -228,9 +227,9 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) if (nfs4_session_draining(tbl->session) && !args->sa_privileged) return false; - slot->renewal_time = jiffies; slot->generation = tbl->generation; args->sa_slot = slot; + res->sr_timestamp = jiffies; res->sr_slot = slot; res->sr_status_flags = 0; res->sr_status = 1; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 04f834cab16c..d17b08091d4b 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -19,7 +19,6 @@ struct nfs4_slot { struct nfs4_slot_table *table; struct nfs4_slot *next; unsigned long generation; - unsigned long renewal_time; u32 slot_nr; u32 seq_nr; }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a55abd499c21..29adb12c7ecf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -194,6 +194,7 @@ struct nfs4_sequence_args { struct nfs4_sequence_res { struct nfs4_slot *sr_slot; /* slot used to send request */ + unsigned long sr_timestamp; int sr_status; /* sequence operation status */ u32 sr_status_flags; u32 sr_highest_slotid; -- cgit v1.2.3 From 11520e5e7c1855fc3bf202bb3be35a39d9efa034 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 15 Dec 2012 15:15:24 -0800 Subject: Revert "x86-64/efi: Use EFI to deal with platform wall clock (again)" This reverts commit bd52276fa1d4 ("x86-64/efi: Use EFI to deal with platform wall clock (again)"), and the two supporting commits: da5a108d05b4: "x86/kernel: remove tboot 1:1 page table creation code" 185034e72d59: "x86, efi: 1:1 pagetable mapping for virtual EFI calls") as they all depend semantically on commit 53b87cf088e2 ("x86, mm: Include the entire kernel memory map in trampoline_pgd") that got reverted earlier due to the problems it caused. This was pointed out by Yinghai Lu, and verified by me on my Macbook Air that uses EFI. Pointed-out-by: Yinghai Lu Signed-off-by: Linus Torvalds --- arch/x86/include/asm/efi.h | 28 ++++----------- arch/x86/kernel/tboot.c | 78 +++++++++++++++++++++++++++++++++++++++--- arch/x86/mm/pageattr.c | 10 +++--- arch/x86/platform/efi/efi.c | 30 +++++++++++++--- arch/x86/platform/efi/efi_64.c | 15 -------- include/linux/efi.h | 2 ++ init/main.c | 8 ++--- 7 files changed, 116 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index fd13815fe85c..6e8fdf5ad113 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -69,37 +69,23 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ (u64)(a4), (u64)(a5), (u64)(a6)) -extern unsigned long efi_call_virt_prelog(void); -extern void efi_call_virt_epilog(unsigned long); - -#define efi_callx(x, func, ...) \ - ({ \ - efi_status_t __status; \ - unsigned long __pgd; \ - \ - __pgd = efi_call_virt_prelog(); \ - __status = efi_call##x(func, __VA_ARGS__); \ - efi_call_virt_epilog(__pgd); \ - __status; \ - }) - #define efi_call_virt0(f) \ - efi_callx(0, (void *)(efi.systab->runtime->f)) + efi_call0((void *)(efi.systab->runtime->f)) #define efi_call_virt1(f, a1) \ - efi_callx(1, (void *)(efi.systab->runtime->f), (u64)(a1)) + efi_call1((void *)(efi.systab->runtime->f), (u64)(a1)) #define efi_call_virt2(f, a1, a2) \ - efi_callx(2, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2)) + efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2)) #define efi_call_virt3(f, a1, a2, a3) \ - efi_callx(3, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3)) #define efi_call_virt4(f, a1, a2, a3, a4) \ - efi_callx(4, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4)) #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ - efi_callx(5, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4), (u64)(a5)) #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ - efi_callx(6, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index d4f460f962ee..f84fe00fad48 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -103,13 +103,71 @@ void __init tboot_probe(void) pr_debug("tboot_size: 0x%x\n", tboot->tboot_size); } +static pgd_t *tboot_pg_dir; +static struct mm_struct tboot_mm = { + .mm_rb = RB_ROOT, + .pgd = swapper_pg_dir, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(init_mm.mmlist), +}; + static inline void switch_to_tboot_pt(void) { -#ifdef CONFIG_X86_32 - load_cr3(initial_page_table); -#else - write_cr3(real_mode_header->trampoline_pgd); -#endif + write_cr3(virt_to_phys(tboot_pg_dir)); +} + +static int map_tboot_page(unsigned long vaddr, unsigned long pfn, + pgprot_t prot) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset(&tboot_mm, vaddr); + pud = pud_alloc(&tboot_mm, pgd, vaddr); + if (!pud) + return -1; + pmd = pmd_alloc(&tboot_mm, pud, vaddr); + if (!pmd) + return -1; + pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr); + if (!pte) + return -1; + set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); + pte_unmap(pte); + return 0; +} + +static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn, + unsigned long nr) +{ + /* Reuse the original kernel mapping */ + tboot_pg_dir = pgd_alloc(&tboot_mm); + if (!tboot_pg_dir) + return -1; + + for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) { + if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC)) + return -1; + } + + return 0; +} + +static void tboot_create_trampoline(void) +{ + u32 map_base, map_size; + + /* Create identity map for tboot shutdown code. */ + map_base = PFN_DOWN(tboot->tboot_base); + map_size = PFN_UP(tboot->tboot_size); + if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size)) + panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", + map_base, map_size); } #ifdef CONFIG_ACPI_SLEEP @@ -167,6 +225,14 @@ void tboot_shutdown(u32 shutdown_type) if (!tboot_enabled()) return; + /* + * if we're being called before the 1:1 mapping is set up then just + * return and let the normal shutdown happen; this should only be + * due to very early panic() + */ + if (!tboot_pg_dir) + return; + /* if this is S3 then set regions to MAC */ if (shutdown_type == TB_SHUTDOWN_S3) if (tboot_setup_sleep()) @@ -277,6 +343,8 @@ static __init int tboot_late_init(void) if (!tboot_enabled()) return 0; + tboot_create_trampoline(); + atomic_set(&ap_wfs_count, 0); register_hotcpu_notifier(&tboot_cpu_notifier); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 931930a96160..a718e0d23503 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -919,13 +919,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, /* * On success we use clflush, when the CPU supports it to - * avoid the wbindv. If the CPU does not support it, in the - * error case, and during early boot (for EFI) we fall back - * to cpa_flush_all (which uses wbinvd): + * avoid the wbindv. If the CPU does not support it and in the + * error case we fall back to cpa_flush_all (which uses + * wbindv): */ - if (early_boot_irqs_disabled) - __cpa_flush_all((void *)(long)cache); - else if (!ret && cpu_has_clflush) { + if (!ret && cpu_has_clflush) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { cpa_flush_array(addr, numpages, cache, cpa.flags, pages); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0a34d9e9c263..ad4439145f85 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -239,7 +239,22 @@ static efi_status_t __init phys_efi_set_virtual_address_map( return status; } -static int efi_set_rtc_mmss(unsigned long nowtime) +static efi_status_t __init phys_efi_get_time(efi_time_t *tm, + efi_time_cap_t *tc) +{ + unsigned long flags; + efi_status_t status; + + spin_lock_irqsave(&rtc_lock, flags); + efi_call_phys_prelog(); + status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), + virt_to_phys(tc)); + efi_call_phys_epilog(); + spin_unlock_irqrestore(&rtc_lock, flags); + return status; +} + +int efi_set_rtc_mmss(unsigned long nowtime) { int real_seconds, real_minutes; efi_status_t status; @@ -268,7 +283,7 @@ static int efi_set_rtc_mmss(unsigned long nowtime) return 0; } -static unsigned long efi_get_time(void) +unsigned long efi_get_time(void) { efi_status_t status; efi_time_t eft; @@ -624,13 +639,18 @@ static int __init efi_runtime_init(void) } /* * We will only need *early* access to the following - * EFI runtime service before set_virtual_address_map + * two EFI runtime services before set_virtual_address_map * is invoked. */ + efi_phys.get_time = (efi_get_time_t *)runtime->get_time; efi_phys.set_virtual_address_map = (efi_set_virtual_address_map_t *) runtime->set_virtual_address_map; - + /* + * Make efi_get_time can be called before entering + * virtual mode. + */ + efi.get_time = phys_efi_get_time; early_iounmap(runtime, sizeof(efi_runtime_services_t)); return 0; @@ -716,10 +736,12 @@ void __init efi_init(void) efi_enabled = 0; return; } +#ifdef CONFIG_X86_32 if (efi_is_native()) { x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; } +#endif #if EFI_DEBUG print_efi_memmap(); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 06c8b2e662ab..95fd505dfeb6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -58,21 +58,6 @@ static void __init early_code_mapping_set_exec(int executable) } } -unsigned long efi_call_virt_prelog(void) -{ - unsigned long saved; - - saved = read_cr3(); - write_cr3(real_mode_header->trampoline_pgd); - - return saved; -} - -void efi_call_virt_epilog(unsigned long saved) -{ - write_cr3(saved); -} - void __init efi_call_phys_prelog(void) { unsigned long vaddress; diff --git a/include/linux/efi.h b/include/linux/efi.h index 02a69418be18..8b84916dc671 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -587,6 +587,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); +extern unsigned long efi_get_time(void); +extern int efi_set_rtc_mmss(unsigned long nowtime); extern void efi_reserve_boot_services(void); extern struct efi_memory_map memmap; diff --git a/init/main.c b/init/main.c index 6af5470b8067..63ae904a99a8 100644 --- a/init/main.c +++ b/init/main.c @@ -463,10 +463,6 @@ static void __init mm_init(void) percpu_init_late(); pgtable_cache_init(); vmalloc_init(); -#ifdef CONFIG_X86 - if (efi_enabled) - efi_enter_virtual_mode(); -#endif } asmlinkage void __init start_kernel(void) @@ -607,6 +603,10 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); anon_vma_init(); +#ifdef CONFIG_X86 + if (efi_enabled) + efi_enter_virtual_mode(); +#endif thread_info_cache_init(); cred_init(); fork_init(totalram_pages); -- cgit v1.2.3 From 9360b53661a2c7754517b2925580055bacc8ec38 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 17 Dec 2012 11:29:09 -0800 Subject: Revert "bdi: add a user-tunable cpu_list for the bdi flusher threads" This reverts commit 8fa72d234da9b6b473bbb1f74d533663e4996e6b. People disagree about how this should be done, so let's revert this for now so that nobody starts using the new tuning interface. Tejun is thinking about a more generic interface for thread pool affinity. Requested-by: Tejun Heo Acked-by: Jeff Moyer Acked-by: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 4 --- mm/backing-dev.c | 84 --------------------------------------------- 2 files changed, 88 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 238521a19849..2a9a9abc9126 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,7 +18,6 @@ #include #include #include -#include struct page; struct device; @@ -106,9 +105,6 @@ struct backing_dev_info { struct timer_list laptop_mode_wb_timer; - cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */ - struct mutex flusher_cpumask_lock; - #ifdef CONFIG_DEBUG_FS struct dentry *debug_dir; struct dentry *debug_stats; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index bd6a6cabef71..d3ca2b3ee176 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -10,7 +10,6 @@ #include #include #include -#include #include static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); @@ -222,63 +221,12 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio) -static ssize_t cpu_list_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct backing_dev_info *bdi = dev_get_drvdata(dev); - struct bdi_writeback *wb = &bdi->wb; - cpumask_var_t newmask; - ssize_t ret; - struct task_struct *task; - - if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) - return -ENOMEM; - - ret = cpulist_parse(buf, newmask); - if (!ret) { - spin_lock_bh(&bdi->wb_lock); - task = wb->task; - if (task) - get_task_struct(task); - spin_unlock_bh(&bdi->wb_lock); - - mutex_lock(&bdi->flusher_cpumask_lock); - if (task) { - ret = set_cpus_allowed_ptr(task, newmask); - put_task_struct(task); - } - if (ret == 0) { - cpumask_copy(bdi->flusher_cpumask, newmask); - ret = count; - } - mutex_unlock(&bdi->flusher_cpumask_lock); - - } - free_cpumask_var(newmask); - - return ret; -} - -static ssize_t cpu_list_show(struct device *dev, - struct device_attribute *attr, char *page) -{ - struct backing_dev_info *bdi = dev_get_drvdata(dev); - ssize_t ret; - - mutex_lock(&bdi->flusher_cpumask_lock); - ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask); - mutex_unlock(&bdi->flusher_cpumask_lock); - - return ret; -} - #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), - __ATTR_RW(cpu_list), __ATTR_NULL, }; @@ -480,7 +428,6 @@ static int bdi_forker_thread(void *ptr) writeback_inodes_wb(&bdi->wb, 1024, WB_REASON_FORKER_THREAD); } else { - int ret; /* * The spinlock makes sure we do not lose * wake-ups when racing with 'bdi_queue_work()'. @@ -490,14 +437,6 @@ static int bdi_forker_thread(void *ptr) spin_lock_bh(&bdi->wb_lock); bdi->wb.task = task; spin_unlock_bh(&bdi->wb_lock); - mutex_lock(&bdi->flusher_cpumask_lock); - ret = set_cpus_allowed_ptr(task, - bdi->flusher_cpumask); - mutex_unlock(&bdi->flusher_cpumask_lock); - if (ret) - printk_once("%s: failed to bind flusher" - " thread %s, error %d\n", - __func__, task->comm, ret); wake_up_process(task); } bdi_clear_pending(bdi); @@ -570,17 +509,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, dev_name(dev)); if (IS_ERR(wb->task)) return PTR_ERR(wb->task); - } else { - int node; - /* - * Set up a default cpumask for the flusher threads that - * includes all cpus on the same numa node as the device. - * The mask may be overridden via sysfs. - */ - node = dev_to_node(bdi->dev); - if (node != NUMA_NO_NODE) - cpumask_copy(bdi->flusher_cpumask, - cpumask_of_node(node)); } bdi_debug_register(bdi, dev_name(dev)); @@ -706,15 +634,6 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); - if (!bdi_cap_flush_forker(bdi)) { - bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); - if (!bdi->flusher_cpumask) - return -ENOMEM; - cpumask_setall(bdi->flusher_cpumask); - mutex_init(&bdi->flusher_cpumask_lock); - } else - bdi->flusher_cpumask = NULL; - for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init(&bdi->bdi_stat[i], 0); if (err) @@ -737,7 +656,6 @@ int bdi_init(struct backing_dev_info *bdi) err: while (i--) percpu_counter_destroy(&bdi->bdi_stat[i]); - kfree(bdi->flusher_cpumask); } return err; @@ -765,8 +683,6 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); - kfree(bdi->flusher_cpumask); - /* * If bdi_unregister() had already been called earlier, the * wakeup_timer could still be armed because bdi_prune_sb() -- cgit v1.2.3 From 7929d407e47fbf843fe1337fd95ed57785ae5e9d Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Mon, 17 Dec 2012 15:59:32 -0800 Subject: include/linux/init.h: use the stringify operator for the __define_initcall macro Currently the __define_initcall() macro takes three arguments, fn, id and level. The level argument is exactly the same as the id argument but wrapped in quotes. To overcome this need to specify three arguments to the __define_initcall macro, where one argument is the stringification of another, we can just use the stringification macro instead. Signed-off-by: Matthew Leach Cc: Benjamin Herrenschmidt Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/machdep.h | 36 +++++++++++++++++----------------- include/linux/init.h | 40 +++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index c4231973edd3..3c82daf8be99 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -320,28 +320,28 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal) ppc_md.log_error(buf, err_type, fatal); } -#define __define_machine_initcall(mach,level,fn,id) \ +#define __define_machine_initcall(mach, fn, id) \ static int __init __machine_initcall_##mach##_##fn(void) { \ if (machine_is(mach)) return fn(); \ return 0; \ } \ - __define_initcall(level,__machine_initcall_##mach##_##fn,id); - -#define machine_core_initcall(mach,fn) __define_machine_initcall(mach,"1",fn,1) -#define machine_core_initcall_sync(mach,fn) __define_machine_initcall(mach,"1s",fn,1s) -#define machine_postcore_initcall(mach,fn) __define_machine_initcall(mach,"2",fn,2) -#define machine_postcore_initcall_sync(mach,fn) __define_machine_initcall(mach,"2s",fn,2s) -#define machine_arch_initcall(mach,fn) __define_machine_initcall(mach,"3",fn,3) -#define machine_arch_initcall_sync(mach,fn) __define_machine_initcall(mach,"3s",fn,3s) -#define machine_subsys_initcall(mach,fn) __define_machine_initcall(mach,"4",fn,4) -#define machine_subsys_initcall_sync(mach,fn) __define_machine_initcall(mach,"4s",fn,4s) -#define machine_fs_initcall(mach,fn) __define_machine_initcall(mach,"5",fn,5) -#define machine_fs_initcall_sync(mach,fn) __define_machine_initcall(mach,"5s",fn,5s) -#define machine_rootfs_initcall(mach,fn) __define_machine_initcall(mach,"rootfs",fn,rootfs) -#define machine_device_initcall(mach,fn) __define_machine_initcall(mach,"6",fn,6) -#define machine_device_initcall_sync(mach,fn) __define_machine_initcall(mach,"6s",fn,6s) -#define machine_late_initcall(mach,fn) __define_machine_initcall(mach,"7",fn,7) -#define machine_late_initcall_sync(mach,fn) __define_machine_initcall(mach,"7s",fn,7s) + __define_initcall(__machine_initcall_##mach##_##fn, id); + +#define machine_core_initcall(mach, fn) __define_machine_initcall(mach, fn, 1) +#define machine_core_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 1s) +#define machine_postcore_initcall(mach, fn) __define_machine_initcall(mach, fn, 2) +#define machine_postcore_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 2s) +#define machine_arch_initcall(mach, fn) __define_machine_initcall(mach, fn, 3) +#define machine_arch_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 3s) +#define machine_subsys_initcall(mach, fn) __define_machine_initcall(mach, fn, 4) +#define machine_subsys_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 4s) +#define machine_fs_initcall(mach, fn) __define_machine_initcall(mach, fn, 5) +#define machine_fs_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 5s) +#define machine_rootfs_initcall(mach, fn) __define_machine_initcall(mach, fn, rootfs) +#define machine_device_initcall(mach, fn) __define_machine_initcall(mach, fn, 6) +#define machine_device_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 6s) +#define machine_late_initcall(mach, fn) __define_machine_initcall(mach, fn, 7) +#define machine_late_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 7s) #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MACHDEP_H */ diff --git a/include/linux/init.h b/include/linux/init.h index f63692d6902e..a799273714ac 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -182,16 +182,16 @@ extern bool initcall_debug; * can point at the same handler without causing duplicate-symbol build errors. */ -#define __define_initcall(level,fn,id) \ +#define __define_initcall(fn, id) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" level ".init"))) = fn + __attribute__((__section__(".initcall" #id ".init"))) = fn /* * Early initcalls run before initializing SMP. * * Only for built-in code, not modules. */ -#define early_initcall(fn) __define_initcall("early",fn,early) +#define early_initcall(fn) __define_initcall(fn, early) /* * A "pure" initcall has no dependencies on anything else, and purely @@ -200,23 +200,23 @@ extern bool initcall_debug; * This only exists for built-in code, not for modules. * Keep main.c:initcall_level_names[] in sync. */ -#define pure_initcall(fn) __define_initcall("0",fn,0) - -#define core_initcall(fn) __define_initcall("1",fn,1) -#define core_initcall_sync(fn) __define_initcall("1s",fn,1s) -#define postcore_initcall(fn) __define_initcall("2",fn,2) -#define postcore_initcall_sync(fn) __define_initcall("2s",fn,2s) -#define arch_initcall(fn) __define_initcall("3",fn,3) -#define arch_initcall_sync(fn) __define_initcall("3s",fn,3s) -#define subsys_initcall(fn) __define_initcall("4",fn,4) -#define subsys_initcall_sync(fn) __define_initcall("4s",fn,4s) -#define fs_initcall(fn) __define_initcall("5",fn,5) -#define fs_initcall_sync(fn) __define_initcall("5s",fn,5s) -#define rootfs_initcall(fn) __define_initcall("rootfs",fn,rootfs) -#define device_initcall(fn) __define_initcall("6",fn,6) -#define device_initcall_sync(fn) __define_initcall("6s",fn,6s) -#define late_initcall(fn) __define_initcall("7",fn,7) -#define late_initcall_sync(fn) __define_initcall("7s",fn,7s) +#define pure_initcall(fn) __define_initcall(fn, 0) + +#define core_initcall(fn) __define_initcall(fn, 1) +#define core_initcall_sync(fn) __define_initcall(fn, 1s) +#define postcore_initcall(fn) __define_initcall(fn, 2) +#define postcore_initcall_sync(fn) __define_initcall(fn, 2s) +#define arch_initcall(fn) __define_initcall(fn, 3) +#define arch_initcall_sync(fn) __define_initcall(fn, 3s) +#define subsys_initcall(fn) __define_initcall(fn, 4) +#define subsys_initcall_sync(fn) __define_initcall(fn, 4s) +#define fs_initcall(fn) __define_initcall(fn, 5) +#define fs_initcall_sync(fn) __define_initcall(fn, 5s) +#define rootfs_initcall(fn) __define_initcall(fn, rootfs) +#define device_initcall(fn) __define_initcall(fn, 6) +#define device_initcall_sync(fn) __define_initcall(fn, 6s) +#define late_initcall(fn) __define_initcall(fn, 7) +#define late_initcall_sync(fn) __define_initcall(fn, 7s) #define __initcall(fn) device_initcall(fn) -- cgit v1.2.3 From 965c8e59cfcf845ecde2265a1d1bfee5f011d302 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 17 Dec 2012 15:59:39 -0800 Subject: lseek: the "whence" argument is called "whence" But the kernel decided to call it "origin" instead. Fix most of the sites. Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/bad_inode.c | 2 +- fs/block_dev.c | 4 ++-- fs/btrfs/file.c | 16 ++++++++-------- fs/ceph/dir.c | 4 ++-- fs/ceph/file.c | 6 +++--- fs/cifs/cifsfs.c | 8 ++++---- fs/configfs/dir.c | 4 ++-- fs/ext3/dir.c | 6 +++--- fs/ext4/dir.c | 6 +++--- fs/ext4/file.c | 22 +++++++++++----------- fs/fuse/file.c | 8 ++++---- fs/gfs2/file.c | 10 +++++----- fs/libfs.c | 4 ++-- fs/nfs/dir.c | 6 +++--- fs/nfs/file.c | 10 +++++----- fs/ocfs2/extent_map.c | 12 ++++++------ fs/ocfs2/file.c | 6 +++--- fs/pstore/inode.c | 6 +++--- fs/read_write.c | 40 ++++++++++++++++++++-------------------- fs/seq_file.c | 4 ++-- fs/ubifs/dir.c | 4 ++-- include/linux/fs.h | 12 ++++++------ include/linux/ftrace.h | 4 ++-- include/linux/syscalls.h | 4 ++-- kernel/trace/ftrace.c | 4 ++-- mm/shmem.c | 20 ++++++++++---------- 26 files changed, 116 insertions(+), 116 deletions(-) (limited to 'include/linux') diff --git a/fs/bad_inode.c b/fs/bad_inode.c index b1342ffb3cf6..922ad460bff9 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -16,7 +16,7 @@ #include -static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin) +static loff_t bad_file_llseek(struct file *file, loff_t offset, int whence) { return -EIO; } diff --git a/fs/block_dev.c b/fs/block_dev.c index ab3a456f6650..172f8491a2bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -321,7 +321,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, * for a block special file file->f_path.dentry->d_inode->i_size is zero * so we compute the size by hand (just as in block_read/write above) */ -static loff_t block_llseek(struct file *file, loff_t offset, int origin) +static loff_t block_llseek(struct file *file, loff_t offset, int whence) { struct inode *bd_inode = file->f_mapping->host; loff_t size; @@ -331,7 +331,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) size = i_size_read(bd_inode); retval = -EINVAL; - switch (origin) { + switch (whence) { case SEEK_END: offset += size; break; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a8ee75cb96ee..9c6673a9231f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2120,7 +2120,7 @@ out: return ret; } -static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) +static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) { struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map *em; @@ -2154,7 +2154,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) * before the position we want in case there is outstanding delalloc * going on here. */ - if (origin == SEEK_HOLE && start != 0) { + if (whence == SEEK_HOLE && start != 0) { if (start <= root->sectorsize) em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, root->sectorsize, 0); @@ -2188,13 +2188,13 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) } } - if (origin == SEEK_HOLE) { + if (whence == SEEK_HOLE) { *offset = start; free_extent_map(em); break; } } else { - if (origin == SEEK_DATA) { + if (whence == SEEK_DATA) { if (em->block_start == EXTENT_MAP_DELALLOC) { if (start >= inode->i_size) { free_extent_map(em); @@ -2231,16 +2231,16 @@ out: return ret; } -static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) +static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int ret; mutex_lock(&inode->i_mutex); - switch (origin) { + switch (whence) { case SEEK_END: case SEEK_CUR: - offset = generic_file_llseek(file, offset, origin); + offset = generic_file_llseek(file, offset, whence); goto out; case SEEK_DATA: case SEEK_HOLE: @@ -2249,7 +2249,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) return -ENXIO; } - ret = find_desired_extent(inode, &offset, origin); + ret = find_desired_extent(inode, &offset, whence); if (ret) { mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e5b77319c97b..8c1aabe93b67 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -454,7 +454,7 @@ static void reset_readdir(struct ceph_file_info *fi) fi->flags &= ~CEPH_F_ATEND; } -static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) +static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) { struct ceph_file_info *fi = file->private_data; struct inode *inode = file->f_mapping->host; @@ -463,7 +463,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) mutex_lock(&inode->i_mutex); retval = -EINVAL; - switch (origin) { + switch (whence) { case SEEK_END: offset += inode->i_size + 2; /* FIXME */ break; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5840d2aaed15..d4dfdcf76d7f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -797,7 +797,7 @@ out: /* * llseek. be sure to verify file size on SEEK_END. */ -static loff_t ceph_llseek(struct file *file, loff_t offset, int origin) +static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int ret; @@ -805,7 +805,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin) mutex_lock(&inode->i_mutex); __ceph_do_pending_vmtruncate(inode); - if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) { + if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); if (ret < 0) { offset = ret; @@ -813,7 +813,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin) } } - switch (origin) { + switch (whence) { case SEEK_END: offset += inode->i_size; break; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 210f0af83fc4..ce9f3c5421bf 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -695,13 +695,13 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return written; } -static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) +static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) { /* - * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate + * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate * the cached file length */ - if (origin != SEEK_SET && origin != SEEK_CUR) { + if (whence != SEEK_SET && whence != SEEK_CUR) { int rc; struct inode *inode = file->f_path.dentry->d_inode; @@ -728,7 +728,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) if (rc < 0) return (loff_t)rc; } - return generic_file_llseek(file, offset, origin); + return generic_file_llseek(file, offset, whence); } static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7414ae24a79b..712b10f64c70 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1613,12 +1613,12 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir return 0; } -static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) +static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry * dentry = file->f_path.dentry; mutex_lock(&dentry->d_inode->i_mutex); - switch (origin) { + switch (whence) { case 1: offset += file->f_pos; case 0: diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index c8fff930790d..dd91264ba94f 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -296,17 +296,17 @@ static inline loff_t ext3_get_htree_eof(struct file *filp) * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) * will be invalid once the directory was converted into a dx directory */ -loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) +loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int dx_dir = is_dx_dir(inode); loff_t htree_max = ext3_get_htree_eof(file); if (likely(dx_dir)) - return generic_file_llseek_size(file, offset, origin, + return generic_file_llseek_size(file, offset, whence, htree_max, htree_max); else - return generic_file_llseek(file, offset, origin); + return generic_file_llseek(file, offset, whence); } /* diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b8d877f6c1fa..80a28b297279 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -333,17 +333,17 @@ static inline loff_t ext4_get_htree_eof(struct file *filp) * * For non-htree, ext4_llseek already chooses the proper max offset. */ -loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin) +loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int dx_dir = is_dx_dir(inode); loff_t htree_max = ext4_get_htree_eof(file); if (likely(dx_dir)) - return generic_file_llseek_size(file, offset, origin, + return generic_file_llseek_size(file, offset, whence, htree_max, htree_max); else - return ext4_llseek(file, offset, origin); + return ext4_llseek(file, offset, whence); } /* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b64a60bf105a..d07c27ca594a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -303,7 +303,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) * page cache has data or not. */ static int ext4_find_unwritten_pgoff(struct inode *inode, - int origin, + int whence, struct ext4_map_blocks *map, loff_t *offset) { @@ -333,10 +333,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); if (nr_pages == 0) { - if (origin == SEEK_DATA) + if (whence == SEEK_DATA) break; - BUG_ON(origin != SEEK_HOLE); + BUG_ON(whence != SEEK_HOLE); /* * If this is the first time to go into the loop and * offset is not beyond the end offset, it will be a @@ -352,7 +352,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, * offset is smaller than the first page offset, it will be a * hole at this offset. */ - if (lastoff == startoff && origin == SEEK_HOLE && + if (lastoff == startoff && whence == SEEK_HOLE && lastoff < page_offset(pvec.pages[0])) { found = 1; break; @@ -366,7 +366,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, * If the current offset is not beyond the end of given * range, it will be a hole. */ - if (lastoff < endoff && origin == SEEK_HOLE && + if (lastoff < endoff && whence == SEEK_HOLE && page->index > end) { found = 1; *offset = lastoff; @@ -391,10 +391,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, do { if (buffer_uptodate(bh) || buffer_unwritten(bh)) { - if (origin == SEEK_DATA) + if (whence == SEEK_DATA) found = 1; } else { - if (origin == SEEK_HOLE) + if (whence == SEEK_HOLE) found = 1; } if (found) { @@ -416,7 +416,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, * The no. of pages is less than our desired, that would be a * hole in there. */ - if (nr_pages < num && origin == SEEK_HOLE) { + if (nr_pages < num && whence == SEEK_HOLE) { found = 1; *offset = lastoff; break; @@ -609,7 +609,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) * by calling generic_file_llseek_size() with the appropriate maxbytes * value for each. */ -loff_t ext4_llseek(struct file *file, loff_t offset, int origin) +loff_t ext4_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes; @@ -619,11 +619,11 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) else maxbytes = inode->i_sb->s_maxbytes; - switch (origin) { + switch (whence) { case SEEK_SET: case SEEK_CUR: case SEEK_END: - return generic_file_llseek_size(file, offset, origin, + return generic_file_llseek_size(file, offset, whence, maxbytes, i_size_read(inode)); case SEEK_DATA: return ext4_seek_data(file, offset, maxbytes); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 78d2837bc940..e21d4d8f87e3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1599,19 +1599,19 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) return err ? 0 : outarg.block; } -static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) +static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) { loff_t retval; struct inode *inode = file->f_path.dentry->d_inode; /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */ - if (origin == SEEK_CUR || origin == SEEK_SET) - return generic_file_llseek(file, offset, origin); + if (whence == SEEK_CUR || whence == SEEK_SET) + return generic_file_llseek(file, offset, whence); mutex_lock(&inode->i_mutex); retval = fuse_update_attributes(inode, NULL, file, NULL); if (!retval) - retval = generic_file_llseek(file, offset, origin); + retval = generic_file_llseek(file, offset, whence); mutex_unlock(&inode->i_mutex); return retval; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index dfe2d8cb9b2c..991ab2d484dd 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -44,7 +44,7 @@ * gfs2_llseek - seek to a location in a file * @file: the file * @offset: the offset - * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) + * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) * * SEEK_END requires the glock for the file because it references the * file's size. @@ -52,26 +52,26 @@ * Returns: The new offset, or errno */ -static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) +static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_holder i_gh; loff_t error; - switch (origin) { + switch (whence) { case SEEK_END: /* These reference inode->i_size */ case SEEK_DATA: case SEEK_HOLE: error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (!error) { - error = generic_file_llseek(file, offset, origin); + error = generic_file_llseek(file, offset, whence); gfs2_glock_dq_uninit(&i_gh); } break; case SEEK_CUR: case SEEK_SET: - error = generic_file_llseek(file, offset, origin); + error = generic_file_llseek(file, offset, whence); break; default: error = -EINVAL; diff --git a/fs/libfs.c b/fs/libfs.c index 7cc37ca19cd8..35fc6e74cd88 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -81,11 +81,11 @@ int dcache_dir_close(struct inode *inode, struct file *file) return 0; } -loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) +loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry *dentry = file->f_path.dentry; mutex_lock(&dentry->d_inode->i_mutex); - switch (origin) { + switch (whence) { case 1: offset += file->f_pos; case 0: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b9e66b7e0c14..1cc71f60b491 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -871,7 +871,7 @@ out: return res; } -static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -880,10 +880,10 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - offset, origin); + offset, whence); mutex_lock(&inode->i_mutex); - switch (origin) { + switch (whence) { case 1: offset += filp->f_pos; case 0: diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 582bb8866131..3c2b893665ba 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -119,18 +119,18 @@ force_reval: return __nfs_revalidate_inode(server, inode); } -loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name, - offset, origin); + offset, whence); /* - * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate + * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate * the cached file length */ - if (origin != SEEK_SET && origin != SEEK_CUR) { + if (whence != SEEK_SET && whence != SEEK_CUR) { struct inode *inode = filp->f_mapping->host; int retval = nfs_revalidate_file_size(inode, filp); @@ -138,7 +138,7 @@ loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return (loff_t)retval; } - return generic_file_llseek(filp, offset, origin); + return generic_file_llseek(filp, offset, whence); } EXPORT_SYMBOL_GPL(nfs_file_llseek); diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 70b5863a2d64..f487aa343442 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -832,7 +832,7 @@ out: return ret; } -int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) { struct inode *inode = file->f_mapping->host; int ret; @@ -843,7 +843,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) struct buffer_head *di_bh = NULL; struct ocfs2_extent_rec rec; - BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE); + BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); ret = ocfs2_inode_lock(inode, &di_bh, 0); if (ret) { @@ -859,7 +859,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) } if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - if (origin == SEEK_HOLE) + if (whence == SEEK_HOLE) *offset = inode->i_size; goto out_unlock; } @@ -888,8 +888,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; } - if ((!is_data && origin == SEEK_HOLE) || - (is_data && origin == SEEK_DATA)) { + if ((!is_data && whence == SEEK_HOLE) || + (is_data && whence == SEEK_DATA)) { if (extoff > *offset) *offset = extoff; goto out_unlock; @@ -899,7 +899,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) cpos += clen; } - if (origin == SEEK_HOLE) { + if (whence == SEEK_HOLE) { extoff = cpos; extoff <<= cs_bits; extlen = clen; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index dda089804942..fe492e1a3cfc 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2637,14 +2637,14 @@ bail: } /* Refer generic_file_llseek_unlocked() */ -static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin) +static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int ret = 0; mutex_lock(&inode->i_mutex); - switch (origin) { + switch (whence) { case SEEK_SET: break; case SEEK_END: @@ -2659,7 +2659,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin) break; case SEEK_DATA: case SEEK_HOLE: - ret = ocfs2_seek_data_hole_offset(file, &offset, origin); + ret = ocfs2_seek_data_hole_offset(file, &offset, whence); if (ret) goto out; break; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index ed1d8c7212da..67de74ca85f4 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -151,13 +151,13 @@ static int pstore_file_open(struct inode *inode, struct file *file) return 0; } -static loff_t pstore_file_llseek(struct file *file, loff_t off, int origin) +static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence) { struct seq_file *sf = file->private_data; if (sf->op) - return seq_lseek(file, off, origin); - return default_llseek(file, off, origin); + return seq_lseek(file, off, whence); + return default_llseek(file, off, whence); } static const struct file_operations pstore_file_operations = { diff --git a/fs/read_write.c b/fs/read_write.c index d06534857e9e..1edaf099ddd7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -54,7 +54,7 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, * generic_file_llseek_size - generic llseek implementation for regular files * @file: file structure to seek on * @offset: file offset to seek to - * @origin: type of seek + * @whence: type of seek * @size: max size of this file in file system * @eof: offset used for SEEK_END position * @@ -67,12 +67,12 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, * read/writes behave like SEEK_SET against seeks. */ loff_t -generic_file_llseek_size(struct file *file, loff_t offset, int origin, +generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof) { struct inode *inode = file->f_mapping->host; - switch (origin) { + switch (whence) { case SEEK_END: offset += eof; break; @@ -122,17 +122,17 @@ EXPORT_SYMBOL(generic_file_llseek_size); * generic_file_llseek - generic llseek implementation for regular files * @file: file structure to seek on * @offset: file offset to seek to - * @origin: type of seek + * @whence: type of seek * * This is a generic implemenation of ->llseek useable for all normal local * filesystems. It just updates the file offset to the value specified by - * @offset and @origin under i_mutex. + * @offset and @whence under i_mutex. */ -loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) +loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - return generic_file_llseek_size(file, offset, origin, + return generic_file_llseek_size(file, offset, whence, inode->i_sb->s_maxbytes, i_size_read(inode)); } @@ -142,32 +142,32 @@ EXPORT_SYMBOL(generic_file_llseek); * noop_llseek - No Operation Performed llseek implementation * @file: file structure to seek on * @offset: file offset to seek to - * @origin: type of seek + * @whence: type of seek * * This is an implementation of ->llseek useable for the rare special case when * userspace expects the seek to succeed but the (device) file is actually not * able to perform the seek. In this case you use noop_llseek() instead of * falling back to the default implementation of ->llseek. */ -loff_t noop_llseek(struct file *file, loff_t offset, int origin) +loff_t noop_llseek(struct file *file, loff_t offset, int whence) { return file->f_pos; } EXPORT_SYMBOL(noop_llseek); -loff_t no_llseek(struct file *file, loff_t offset, int origin) +loff_t no_llseek(struct file *file, loff_t offset, int whence) { return -ESPIPE; } EXPORT_SYMBOL(no_llseek); -loff_t default_llseek(struct file *file, loff_t offset, int origin) +loff_t default_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_path.dentry->d_inode; loff_t retval; mutex_lock(&inode->i_mutex); - switch (origin) { + switch (whence) { case SEEK_END: offset += i_size_read(inode); break; @@ -216,7 +216,7 @@ out: } EXPORT_SYMBOL(default_llseek); -loff_t vfs_llseek(struct file *file, loff_t offset, int origin) +loff_t vfs_llseek(struct file *file, loff_t offset, int whence) { loff_t (*fn)(struct file *, loff_t, int); @@ -225,11 +225,11 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin) if (file->f_op && file->f_op->llseek) fn = file->f_op->llseek; } - return fn(file, offset, origin); + return fn(file, offset, whence); } EXPORT_SYMBOL(vfs_llseek); -SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) +SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) { off_t retval; struct fd f = fdget(fd); @@ -237,8 +237,8 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) return -EBADF; retval = -EINVAL; - if (origin <= SEEK_MAX) { - loff_t res = vfs_llseek(f.file, offset, origin); + if (whence <= SEEK_MAX) { + loff_t res = vfs_llseek(f.file, offset, whence); retval = res; if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ @@ -250,7 +250,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) #ifdef __ARCH_WANT_SYS_LLSEEK SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned long, offset_low, loff_t __user *, result, - unsigned int, origin) + unsigned int, whence) { int retval; struct fd f = fdget(fd); @@ -260,11 +260,11 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, return -EBADF; retval = -EINVAL; - if (origin > SEEK_MAX) + if (whence > SEEK_MAX) goto out_putf; offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, - origin); + whence); retval = (int)offset; if (offset >= 0) { diff --git a/fs/seq_file.c b/fs/seq_file.c index 99dffab4c4e4..9d863fb501f9 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -300,14 +300,14 @@ EXPORT_SYMBOL(seq_read); * * Ready-made ->f_op->llseek() */ -loff_t seq_lseek(struct file *file, loff_t offset, int origin) +loff_t seq_lseek(struct file *file, loff_t offset, int whence) { struct seq_file *m = file->private_data; loff_t retval = -EINVAL; mutex_lock(&m->lock); m->version = file->f_version; - switch (origin) { + switch (whence) { case 1: offset += file->f_pos; case 0: diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index e271fba1651b..8a574776a493 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -453,11 +453,11 @@ out: } /* If a directory is seeked, we have to free saved readdir() state */ -static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) { kfree(file->private_data); file->private_data = NULL; - return generic_file_llseek(file, offset, origin); + return generic_file_llseek(file, offset, whence); } /* Free saved readdir() state when the directory is closed */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 408fb1e77a0a..029552ff774c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2286,9 +2286,9 @@ extern ino_t find_inode_number(struct dentry *, struct qstr *); #include /* needed for stackable file system support */ -extern loff_t default_llseek(struct file *file, loff_t offset, int origin); +extern loff_t default_llseek(struct file *file, loff_t offset, int whence); -extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); +extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); @@ -2396,11 +2396,11 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -extern loff_t noop_llseek(struct file *file, loff_t offset, int origin); -extern loff_t no_llseek(struct file *file, loff_t offset, int origin); -extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); +extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); +extern loff_t no_llseek(struct file *file, loff_t offset, int whence); +extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, - int origin, loff_t maxsize, loff_t eof); + int whence, loff_t maxsize, loff_t eof); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a52f2f4fe030..92691d85c320 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -394,7 +394,7 @@ ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); -loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin); +loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence); int ftrace_regex_release(struct inode *inode, struct file *file); void __init @@ -559,7 +559,7 @@ static inline ssize_t ftrace_filter_write(struct file *file, const char __user * size_t cnt, loff_t *ppos) { return -ENODEV; } static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { return -ENODEV; } -static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin) +static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence) { return -ENODEV; } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 91835e7f364d..36c3b07c5119 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -560,10 +560,10 @@ asmlinkage long sys_utime(char __user *filename, asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes); asmlinkage long sys_lseek(unsigned int fd, off_t offset, - unsigned int origin); + unsigned int whence); asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, unsigned long offset_low, loff_t __user *result, - unsigned int origin); + unsigned int whence); asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count); asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); asmlinkage long sys_readv(unsigned long fd, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index afd092de45b7..3ffe4c5ad3f3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2675,12 +2675,12 @@ ftrace_notrace_open(struct inode *inode, struct file *file) } loff_t -ftrace_regex_lseek(struct file *file, loff_t offset, int origin) +ftrace_regex_lseek(struct file *file, loff_t offset, int whence) { loff_t ret; if (file->f_mode & FMODE_READ) - ret = seq_lseek(file, offset, origin); + ret = seq_lseek(file, offset, whence); else file->f_pos = ret = 1; diff --git a/mm/shmem.c b/mm/shmem.c index 03f9ba8fb8e5..5c90d84c2b02 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1719,7 +1719,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. */ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, - pgoff_t index, pgoff_t end, int origin) + pgoff_t index, pgoff_t end, int whence) { struct page *page; struct pagevec pvec; @@ -1733,13 +1733,13 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, pvec.nr = shmem_find_get_pages_and_swap(mapping, index, pvec.nr, pvec.pages, indices); if (!pvec.nr) { - if (origin == SEEK_DATA) + if (whence == SEEK_DATA) index = end; break; } for (i = 0; i < pvec.nr; i++, index++) { if (index < indices[i]) { - if (origin == SEEK_HOLE) { + if (whence == SEEK_HOLE) { done = true; break; } @@ -1751,8 +1751,8 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, page = NULL; } if (index >= end || - (page && origin == SEEK_DATA) || - (!page && origin == SEEK_HOLE)) { + (page && whence == SEEK_DATA) || + (!page && whence == SEEK_HOLE)) { done = true; break; } @@ -1765,15 +1765,15 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, return index; } -static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin) +static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; pgoff_t start, end; loff_t new_offset; - if (origin != SEEK_DATA && origin != SEEK_HOLE) - return generic_file_llseek_size(file, offset, origin, + if (whence != SEEK_DATA && whence != SEEK_HOLE) + return generic_file_llseek_size(file, offset, whence, MAX_LFS_FILESIZE, i_size_read(inode)); mutex_lock(&inode->i_mutex); /* We're holding i_mutex so we can access i_size directly */ @@ -1785,12 +1785,12 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin) else { start = offset >> PAGE_CACHE_SHIFT; end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - new_offset = shmem_seek_hole_data(mapping, start, end, origin); + new_offset = shmem_seek_hole_data(mapping, start, end, whence); new_offset <<= PAGE_CACHE_SHIFT; if (new_offset > offset) { if (new_offset < inode->i_size) offset = new_offset; - else if (origin == SEEK_DATA) + else if (whence == SEEK_DATA) offset = -ENXIO; else offset = inode->i_size; -- cgit v1.2.3 From 8cc9764c9c7d01a6e2c3ddac8f0ac7716be01868 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Mon, 17 Dec 2012 16:00:43 -0800 Subject: drivers/video/backlight/lp855x_bl.c: use generic PWM functions The LP855x family devices support the PWM input for the backlight control. Period of the PWM is configurable in the platform side. Platform specific functions are unnecessary anymore because generic PWM functions are used inside the driver. (PWM input mode) To set the brightness, new lp855x_pwm_ctrl() is used. If a PWM device is not allocated, devm_pwm_get() is called. The PWM consumer name is from the chip name such as 'lp8550' and 'lp8556'. To get the brightness value, no additional handling is required. Just the value of 'props.brightness' is returned. If the PWM driver is not ready while initializing the LP855x driver, it's OK. The PWM device can be retrieved later, when the brightness value is changed. Documentation is updated with an example. [akpm@linux-foundation.org: coding-style simplification, per Thierry] Signed-off-by: Milo(Woogyom) Kim Cc: Thierry Reding Cc: Richard Purdie Cc: Bryan Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/backlight/lp855x-driver.txt | 10 ++------- drivers/video/backlight/lp855x_bl.c | 37 ++++++++++++++++++++++--------- include/linux/platform_data/lp855x.h | 9 ++------ 3 files changed, 30 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt index f5e4caafab7d..1529394cfe8b 100644 --- a/Documentation/backlight/lp855x-driver.txt +++ b/Documentation/backlight/lp855x-driver.txt @@ -35,11 +35,8 @@ For supporting platform specific data, the lp855x platform data can be used. * mode : Brightness control mode. PWM or register based. * device_control : Value of DEVICE CONTROL register. * initial_brightness : Initial value of backlight brightness. -* pwm_data : Platform specific pwm generation functions. +* period_ns : Platform specific PWM period value. unit is nano. Only valid when brightness is pwm input mode. - Functions should be implemented by PWM driver. - - pwm_set_intensity() : set duty of PWM - - pwm_get_intensity() : get current duty of PWM * load_new_rom_data : 0 : use default configuration data 1 : update values of eeprom or eprom registers on loading driver @@ -71,8 +68,5 @@ static struct lp855x_platform_data lp8556_pdata = { .mode = PWM_BASED, .device_control = PWM_CONFIG(LP8556), .initial_brightness = INITIAL_BRT, - .pwm_data = { - .pwm_set_intensity = platform_pwm_set_intensity, - .pwm_get_intensity = platform_pwm_get_intensity, - }, + .period_ns = 1000000, }; diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c index fd985e0681e9..b437541555ff 100644 --- a/drivers/video/backlight/lp855x_bl.c +++ b/drivers/video/backlight/lp855x_bl.c @@ -15,6 +15,7 @@ #include #include #include +#include /* Registers */ #define BRIGHTNESS_CTRL 0x00 @@ -36,6 +37,7 @@ struct lp855x { struct device *dev; struct mutex xfer_lock; struct lp855x_platform_data *pdata; + struct pwm_device *pwm; }; static int lp855x_read_byte(struct lp855x *lp, u8 reg, u8 *data) @@ -121,6 +123,28 @@ static int lp855x_init_registers(struct lp855x *lp) return ret; } +static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br) +{ + unsigned int period = lp->pdata->period_ns; + unsigned int duty = br * period / max_br; + struct pwm_device *pwm; + + /* request pwm device with the consumer name */ + if (!lp->pwm) { + pwm = devm_pwm_get(lp->dev, lp->chipname); + if (IS_ERR(pwm)) + return; + + lp->pwm = pwm; + } + + pwm_config(lp->pwm, duty, period); + if (duty) + pwm_enable(lp->pwm); + else + pwm_disable(lp->pwm); +} + static int lp855x_bl_update_status(struct backlight_device *bl) { struct lp855x *lp = bl_get_data(bl); @@ -130,12 +154,10 @@ static int lp855x_bl_update_status(struct backlight_device *bl) bl->props.brightness = 0; if (mode == PWM_BASED) { - struct lp855x_pwm_data *pd = &lp->pdata->pwm_data; int br = bl->props.brightness; int max_br = bl->props.max_brightness; - if (pd->pwm_set_intensity) - pd->pwm_set_intensity(br, max_br); + lp855x_pwm_ctrl(lp, br, max_br); } else if (mode == REGISTER_BASED) { u8 val = bl->props.brightness; @@ -150,14 +172,7 @@ static int lp855x_bl_get_brightness(struct backlight_device *bl) struct lp855x *lp = bl_get_data(bl); enum lp855x_brightness_ctrl_mode mode = lp->pdata->mode; - if (mode == PWM_BASED) { - struct lp855x_pwm_data *pd = &lp->pdata->pwm_data; - int max_br = bl->props.max_brightness; - - if (pd->pwm_get_intensity) - bl->props.brightness = pd->pwm_get_intensity(max_br); - - } else if (mode == REGISTER_BASED) { + if (mode == REGISTER_BASED) { u8 val = 0; lp855x_read_byte(lp, BRIGHTNESS_CTRL, &val); diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h index 761f31752367..e81f62d24ee2 100644 --- a/include/linux/platform_data/lp855x.h +++ b/include/linux/platform_data/lp855x.h @@ -89,11 +89,6 @@ enum lp8556_brightness_source { LP8556_COMBINED2, /* pwm + i2c after the shaper block */ }; -struct lp855x_pwm_data { - void (*pwm_set_intensity) (int brightness, int max_brightness); - int (*pwm_get_intensity) (int max_brightness); -}; - struct lp855x_rom_data { u8 addr; u8 val; @@ -105,7 +100,7 @@ struct lp855x_rom_data { * @mode : brightness control by pwm or lp855x register * @device_control : value of DEVICE CONTROL register * @initial_brightness : initial value of backlight brightness - * @pwm_data : platform specific pwm generation functions. + * @period_ns : platform specific pwm period value. unit is nano. Only valid when mode is PWM_BASED. * @load_new_rom_data : 0 : use default configuration data @@ -118,7 +113,7 @@ struct lp855x_platform_data { enum lp855x_brightness_ctrl_mode mode; u8 device_control; int initial_brightness; - struct lp855x_pwm_data pwm_data; + unsigned int period_ns; u8 load_new_rom_data; int size_program; struct lp855x_rom_data *rom_data; -- cgit v1.2.3 From 762a936fba7bd9225ca9a96e4860f6969b6b5670 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 17 Dec 2012 16:01:06 -0800 Subject: backlight: add of_find_backlight_by_node() This function finds the struct backlight_device for a given device tree node. A dummy function is provided so that it safely compiles out if OF support is disabled. [akpm@linux-foundation.org: Don't use IS_ENABLED(CONFIG_OF)] Signed-off-by: Thierry Reding Acked-by: Jingoo Han Reviewed-by: Grant Likely Cc: Thierry Reding Reviewed-by: Grant Likely Acked-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/backlight.c | 29 +++++++++++++++++++++++++++++ include/linux/backlight.h | 10 ++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 297db2fa91f5..345f6660d4b3 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -370,6 +370,35 @@ void backlight_device_unregister(struct backlight_device *bd) } EXPORT_SYMBOL(backlight_device_unregister); +#ifdef CONFIG_OF +static int of_parent_match(struct device *dev, void *data) +{ + return dev->parent && dev->parent->of_node == data; +} + +/** + * of_find_backlight_by_node() - find backlight device by device-tree node + * @node: device-tree node of the backlight device + * + * Returns a pointer to the backlight device corresponding to the given DT + * node or NULL if no such backlight device exists or if the device hasn't + * been probed yet. + * + * This function obtains a reference on the backlight device and it is the + * caller's responsibility to drop the reference by calling put_device() on + * the backlight device's .dev field. + */ +struct backlight_device *of_find_backlight_by_node(struct device_node *node) +{ + struct device *dev; + + dev = class_find_device(backlight_class, NULL, node, of_parent_match); + + return dev ? to_backlight_device(dev) : NULL; +} +EXPORT_SYMBOL(of_find_backlight_by_node); +#endif + static void __exit backlight_class_exit(void) { class_destroy(backlight_class); diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 5ffc6dda4675..da9a0825e007 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -134,4 +134,14 @@ struct generic_bl_info { void (*kick_battery)(void); }; +#ifdef CONFIG_OF +struct backlight_device *of_find_backlight_by_node(struct device_node *node); +#else +static inline struct backlight_device * +of_find_backlight_by_node(struct device_node *node) +{ + return NULL; +} +#endif + #endif -- cgit v1.2.3 From b18888ab256f05626193be955a7a03f01d676f8c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Dec 2012 16:01:18 -0800 Subject: string: introduce helper to get base file name from given path There are several places in the kernel that use functionality like basename(3) with the exception: in case of '/foo/bar/' we expect to get an empty string. Let's do it common helper for them. Signed-off-by: Andy Shevchenko Cc: Jason Baron Cc: YAMANE Toshiaki Cc: Greg Kroah-Hartman Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 630125818ca8..ac889c5ea11b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -143,4 +143,15 @@ static inline bool strstarts(const char *str, const char *prefix) extern size_t memweight(const void *ptr, size_t bytes); +/** + * kbasename - return the last part of a pathname. + * + * @path: path to extract the filename from. + */ +static inline const char *kbasename(const char *path) +{ + const char *tail = strrchr(path, '/'); + return tail ? tail + 1 : path; +} + #endif /* _LINUX_STRING_H_ */ -- cgit v1.2.3 From a1fd3e24d8a484b3265a6d485202afe093c058f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:32 -0800 Subject: percpu_rw_semaphore: reimplement to not block the readers unnecessarily Currently the writer does msleep() plus synchronize_sched() 3 times to acquire/release the semaphore, and during this time the readers are blocked completely. Even if the "write" section was not actually started or if it was already finished. With this patch down_write/up_write does synchronize_sched() twice and down_read/up_read are still possible during this time, just they use the slow path. percpu_down_write() first forces the readers to use rw_semaphore and increment the "slow" counter to take the lock for reading, then it takes that rw_semaphore for writing and blocks the readers. Also. With this patch the code relies on the documented behaviour of synchronize_sched(), it doesn't try to pair synchronize_sched() with barrier. Signed-off-by: Oleg Nesterov Reviewed-by: Paul E. McKenney Cc: Linus Torvalds Cc: Mikulas Patocka Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 83 ++++------------------- lib/Makefile | 2 +- lib/percpu-rwsem.c | 154 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 71 deletions(-) create mode 100644 lib/percpu-rwsem.c (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index bd1e86071e57..592f0d610d8e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -2,82 +2,25 @@ #define _LINUX_PERCPU_RWSEM_H #include +#include #include -#include -#include +#include struct percpu_rw_semaphore { - unsigned __percpu *counters; - bool locked; - struct mutex mtx; + unsigned int __percpu *fast_read_ctr; + struct mutex writer_mutex; + struct rw_semaphore rw_sem; + atomic_t slow_read_ctr; + wait_queue_head_t write_waitq; }; -#define light_mb() barrier() -#define heavy_mb() synchronize_sched_expedited() +extern void percpu_down_read(struct percpu_rw_semaphore *); +extern void percpu_up_read(struct percpu_rw_semaphore *); -static inline void percpu_down_read(struct percpu_rw_semaphore *p) -{ - rcu_read_lock_sched(); - if (unlikely(p->locked)) { - rcu_read_unlock_sched(); - mutex_lock(&p->mtx); - this_cpu_inc(*p->counters); - mutex_unlock(&p->mtx); - return; - } - this_cpu_inc(*p->counters); - rcu_read_unlock_sched(); - light_mb(); /* A, between read of p->locked and read of data, paired with D */ -} +extern void percpu_down_write(struct percpu_rw_semaphore *); +extern void percpu_up_write(struct percpu_rw_semaphore *); -static inline void percpu_up_read(struct percpu_rw_semaphore *p) -{ - light_mb(); /* B, between read of the data and write to p->counter, paired with C */ - this_cpu_dec(*p->counters); -} - -static inline unsigned __percpu_count(unsigned __percpu *counters) -{ - unsigned total = 0; - int cpu; - - for_each_possible_cpu(cpu) - total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu)); - - return total; -} - -static inline void percpu_down_write(struct percpu_rw_semaphore *p) -{ - mutex_lock(&p->mtx); - p->locked = true; - synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */ - while (__percpu_count(p->counters)) - msleep(1); - heavy_mb(); /* C, between read of p->counter and write to data, paired with B */ -} - -static inline void percpu_up_write(struct percpu_rw_semaphore *p) -{ - heavy_mb(); /* D, between write to data and write to p->locked, paired with A */ - p->locked = false; - mutex_unlock(&p->mtx); -} - -static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p) -{ - p->counters = alloc_percpu(unsigned); - if (unlikely(!p->counters)) - return -ENOMEM; - p->locked = false; - mutex_init(&p->mtx); - return 0; -} - -static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p) -{ - free_percpu(p->counters); - p->counters = NULL; /* catch use after free bugs */ -} +extern int percpu_init_rwsem(struct percpu_rw_semaphore *); +extern void percpu_free_rwsem(struct percpu_rw_semaphore *); #endif diff --git a/lib/Makefile b/lib/Makefile index e2152fa7ff4d..e959c20efb24 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,7 +9,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ - idr.o int_sqrt.o extable.o \ + idr.o int_sqrt.o extable.o percpu-rwsem.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 000000000000..2e03bcfe48f9 --- /dev/null +++ b/lib/percpu-rwsem.c @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +int percpu_init_rwsem(struct percpu_rw_semaphore *brw) +{ + brw->fast_read_ctr = alloc_percpu(int); + if (unlikely(!brw->fast_read_ctr)) + return -ENOMEM; + + mutex_init(&brw->writer_mutex); + init_rwsem(&brw->rw_sem); + atomic_set(&brw->slow_read_ctr, 0); + init_waitqueue_head(&brw->write_waitq); + return 0; +} + +void percpu_free_rwsem(struct percpu_rw_semaphore *brw) +{ + free_percpu(brw->fast_read_ctr); + brw->fast_read_ctr = NULL; /* catch use after free bugs */ +} + +/* + * This is the fast-path for down_read/up_read, it only needs to ensure + * there is no pending writer (!mutex_is_locked() check) and inc/dec the + * fast per-cpu counter. The writer uses synchronize_sched_expedited() to + * serialize with the preempt-disabled section below. + * + * The nontrivial part is that we should guarantee acquire/release semantics + * in case when + * + * R_W: down_write() comes after up_read(), the writer should see all + * changes done by the reader + * or + * W_R: down_read() comes after up_write(), the reader should see all + * changes done by the writer + * + * If this helper fails the callers rely on the normal rw_semaphore and + * atomic_dec_and_test(), so in this case we have the necessary barriers. + * + * But if it succeeds we do not have any barriers, mutex_is_locked() or + * __this_cpu_add() below can be reordered with any LOAD/STORE done by the + * reader inside the critical section. See the comments in down_write and + * up_write below. + */ +static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) +{ + bool success = false; + + preempt_disable(); + if (likely(!mutex_is_locked(&brw->writer_mutex))) { + __this_cpu_add(*brw->fast_read_ctr, val); + success = true; + } + preempt_enable(); + + return success; +} + +/* + * Like the normal down_read() this is not recursive, the writer can + * come after the first percpu_down_read() and create the deadlock. + */ +void percpu_down_read(struct percpu_rw_semaphore *brw) +{ + if (likely(update_fast_ctr(brw, +1))) + return; + + down_read(&brw->rw_sem); + atomic_inc(&brw->slow_read_ctr); + up_read(&brw->rw_sem); +} + +void percpu_up_read(struct percpu_rw_semaphore *brw) +{ + if (likely(update_fast_ctr(brw, -1))) + return; + + /* false-positive is possible but harmless */ + if (atomic_dec_and_test(&brw->slow_read_ctr)) + wake_up_all(&brw->write_waitq); +} + +static int clear_fast_ctr(struct percpu_rw_semaphore *brw) +{ + unsigned int sum = 0; + int cpu; + + for_each_possible_cpu(cpu) { + sum += per_cpu(*brw->fast_read_ctr, cpu); + per_cpu(*brw->fast_read_ctr, cpu) = 0; + } + + return sum; +} + +/* + * A writer takes ->writer_mutex to exclude other writers and to force the + * readers to switch to the slow mode, note the mutex_is_locked() check in + * update_fast_ctr(). + * + * After that the readers can only inc/dec the slow ->slow_read_ctr counter, + * ->fast_read_ctr is stable. Once the writer moves its sum into the slow + * counter it represents the number of active readers. + * + * Finally the writer takes ->rw_sem for writing and blocks the new readers, + * then waits until the slow counter becomes zero. + */ +void percpu_down_write(struct percpu_rw_semaphore *brw) +{ + /* also blocks update_fast_ctr() which checks mutex_is_locked() */ + mutex_lock(&brw->writer_mutex); + + /* + * 1. Ensures mutex_is_locked() is visible to any down_read/up_read + * so that update_fast_ctr() can't succeed. + * + * 2. Ensures we see the result of every previous this_cpu_add() in + * update_fast_ctr(). + * + * 3. Ensures that if any reader has exited its critical section via + * fast-path, it executes a full memory barrier before we return. + * See R_W case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ + atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); + + /* block the new readers completely */ + down_write(&brw->rw_sem); + + /* wait for all readers to complete their percpu_up_read() */ + wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); +} + +void percpu_up_write(struct percpu_rw_semaphore *brw) +{ + /* allow the new readers, but only the slow-path */ + up_write(&brw->rw_sem); + + /* + * Insert the barrier before the next fast-path in down_read, + * see W_R case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + mutex_unlock(&brw->writer_mutex); +} -- cgit v1.2.3 From 9390ef0c85fd065f01045fef708b046c98cda04c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:36 -0800 Subject: percpu_rw_semaphore: kill ->writer_mutex, add ->write_ctr percpu_rw_semaphore->writer_mutex was only added to simplify the initial rewrite, the only thing it protects is clear_fast_ctr() which otherwise could be called by multiple writers. ->rw_sem is enough to serialize the writers. Kill this mutex and add "atomic_t write_ctr" instead. The writers increment/decrement this counter, the readers check it is zero instead of mutex_is_locked(). Move atomic_add(clear_fast_ctr(), slow_read_ctr) under down_write() to avoid the race with other writers. This is a bit sub-optimal, only the first writer needs this and we do not need to exclude the readers at this stage. But this is simple, we do not want another internal lock until we add more features. And this speeds up the write-contended case. Before this patch the racing writers sleep in synchronize_sched_expedited() sequentially, with this patch multiple synchronize_sched_expedited's can "overlap" with each other. Note: we can do more optimizations, this is only the first step. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 4 ++-- lib/percpu-rwsem.c | 34 ++++++++++++++++------------------ 2 files changed, 18 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 592f0d610d8e..d2146a4f833e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -1,14 +1,14 @@ #ifndef _LINUX_PERCPU_RWSEM_H #define _LINUX_PERCPU_RWSEM_H -#include +#include #include #include #include struct percpu_rw_semaphore { unsigned int __percpu *fast_read_ctr; - struct mutex writer_mutex; + atomic_t write_ctr; struct rw_semaphore rw_sem; atomic_t slow_read_ctr; wait_queue_head_t write_waitq; diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c index 2e03bcfe48f9..ce92ab563a08 100644 --- a/lib/percpu-rwsem.c +++ b/lib/percpu-rwsem.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw) if (unlikely(!brw->fast_read_ctr)) return -ENOMEM; - mutex_init(&brw->writer_mutex); init_rwsem(&brw->rw_sem); + atomic_set(&brw->write_ctr, 0); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); return 0; @@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) /* * This is the fast-path for down_read/up_read, it only needs to ensure - * there is no pending writer (!mutex_is_locked() check) and inc/dec the + * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the * fast per-cpu counter. The writer uses synchronize_sched_expedited() to * serialize with the preempt-disabled section below. * @@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) * If this helper fails the callers rely on the normal rw_semaphore and * atomic_dec_and_test(), so in this case we have the necessary barriers. * - * But if it succeeds we do not have any barriers, mutex_is_locked() or + * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or * __this_cpu_add() below can be reordered with any LOAD/STORE done by the * reader inside the critical section. See the comments in down_write and * up_write below. @@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) bool success = false; preempt_disable(); - if (likely(!mutex_is_locked(&brw->writer_mutex))) { + if (likely(!atomic_read(&brw->write_ctr))) { __this_cpu_add(*brw->fast_read_ctr, val); success = true; } @@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) } /* - * A writer takes ->writer_mutex to exclude other writers and to force the - * readers to switch to the slow mode, note the mutex_is_locked() check in - * update_fast_ctr(). + * A writer increments ->write_ctr to force the readers to switch to the + * slow mode, note the atomic_read() check in update_fast_ctr(). * * After that the readers can only inc/dec the slow ->slow_read_ctr counter, * ->fast_read_ctr is stable. Once the writer moves its sum into the slow @@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) */ void percpu_down_write(struct percpu_rw_semaphore *brw) { - /* also blocks update_fast_ctr() which checks mutex_is_locked() */ - mutex_lock(&brw->writer_mutex); - + /* tell update_fast_ctr() there is a pending writer */ + atomic_inc(&brw->write_ctr); /* - * 1. Ensures mutex_is_locked() is visible to any down_read/up_read + * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read * so that update_fast_ctr() can't succeed. * * 2. Ensures we see the result of every previous this_cpu_add() in @@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) */ synchronize_sched_expedited(); + /* exclude other writers, and block the new readers completely */ + down_write(&brw->rw_sem); + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); - /* block the new readers completely */ - down_write(&brw->rw_sem); - /* wait for all readers to complete their percpu_up_read() */ wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); } void percpu_up_write(struct percpu_rw_semaphore *brw) { - /* allow the new readers, but only the slow-path */ + /* release the lock, but the readers can't use the fast-path */ up_write(&brw->rw_sem); - /* * Insert the barrier before the next fast-path in down_read, * see W_R case in the comment above update_fast_ctr(). */ synchronize_sched_expedited(); - mutex_unlock(&brw->writer_mutex); + /* the last writer unblocks update_fast_ctr() */ + atomic_dec(&brw->write_ctr); } -- cgit v1.2.3 From 8ebe34731a0d1a9d89b536430afd98d0fadec99b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:38 -0800 Subject: percpu_rw_semaphore: add lockdep annotations Add lockdep annotations. Not only this can help to find the potential problems, we do not want the false warnings if, say, the task takes two different percpu_rw_semaphore's for reading. IOW, at least ->rw_sem should not use a single class. This patch exposes this internal lock to lockdep so that it represents the whole percpu_rw_semaphore. This way we do not need to add another "fake" ->lockdep_map and lock_class_key. More importantly, this also makes the output from lockdep much more understandable if it finds the problem. In short, with this patch from lockdep pov percpu_down_read() and percpu_up_read() acquire/release ->rw_sem for reading, this matches the actual semantics. This abuses __up_read() but I hope this is fine and in fact I'd like to have down_read_no_lockdep() as well, percpu_down_read_recursive_readers() will need it. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 10 +++++++++- lib/percpu-rwsem.c | 21 +++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index d2146a4f833e..3e88c9a7d57f 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -5,6 +5,7 @@ #include #include #include +#include struct percpu_rw_semaphore { unsigned int __percpu *fast_read_ctr; @@ -20,7 +21,14 @@ extern void percpu_up_read(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); -extern int percpu_init_rwsem(struct percpu_rw_semaphore *); +extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, + const char *, struct lock_class_key *); extern void percpu_free_rwsem(struct percpu_rw_semaphore *); +#define percpu_init_rwsem(brw) \ +({ \ + static struct lock_class_key rwsem_key; \ + __percpu_init_rwsem(brw, #brw, &rwsem_key); \ +}) + #endif diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c index ce92ab563a08..652a8ee8efe9 100644 --- a/lib/percpu-rwsem.c +++ b/lib/percpu-rwsem.c @@ -2,18 +2,21 @@ #include #include #include +#include #include #include #include #include -int percpu_init_rwsem(struct percpu_rw_semaphore *brw) +int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, + const char *name, struct lock_class_key *rwsem_key) { brw->fast_read_ctr = alloc_percpu(int); if (unlikely(!brw->fast_read_ctr)) return -ENOMEM; - init_rwsem(&brw->rw_sem); + /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ + __init_rwsem(&brw->rw_sem, name, rwsem_key); atomic_set(&brw->write_ctr, 0); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); @@ -66,19 +69,29 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) /* * Like the normal down_read() this is not recursive, the writer can * come after the first percpu_down_read() and create the deadlock. + * + * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, + * percpu_up_read() does rwsem_release(). This pairs with the usage + * of ->rw_sem in percpu_down/up_write(). */ void percpu_down_read(struct percpu_rw_semaphore *brw) { - if (likely(update_fast_ctr(brw, +1))) + might_sleep(); + if (likely(update_fast_ctr(brw, +1))) { + rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); return; + } down_read(&brw->rw_sem); atomic_inc(&brw->slow_read_ctr); - up_read(&brw->rw_sem); + /* avoid up_read()->rwsem_release() */ + __up_read(&brw->rw_sem); } void percpu_up_read(struct percpu_rw_semaphore *brw) { + rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); + if (likely(update_fast_ctr(brw, -1))) return; -- cgit v1.2.3 From 0ad50c3896afbb3c103409a18260e601b87a744c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 17 Dec 2012 16:01:45 -0800 Subject: compat: generic compat_sys_sched_rr_get_interval() implementation This function is used by sparc, powerpc tile and arm64 for compat support. The patch adds a generic implementation with a wrapper for PowerPC to do the u32->int sign extension. The reason for a single patch covering powerpc, tile, sparc and arm64 is to keep it bisectable, otherwise kernel building may fail with mismatched function declarations. Signed-off-by: Catalin Marinas Acked-by: Chris Metcalf [for tile] Acked-by: David S. Miller Acked-by: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/unistd.h | 1 + arch/arm64/kernel/sys_compat.c | 15 --------------- arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/include/asm/unistd.h | 1 + arch/powerpc/kernel/sys_ppc32.c | 17 ++++------------- arch/sparc/include/asm/unistd.h | 1 + arch/sparc/kernel/sys_sparc32.c | 14 -------------- arch/tile/include/asm/compat.h | 2 -- arch/tile/include/asm/unistd.h | 1 + arch/tile/kernel/compat.c | 18 ------------------ include/linux/compat.h | 3 +++ kernel/compat.c | 17 +++++++++++++++++ 12 files changed, 29 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index d69aeea6da1e..76fb7dd3350a 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -20,6 +20,7 @@ #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE +#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index f7b05edf8ce3..26e9c4eeaba8 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -28,21 +28,6 @@ #include #include -asmlinkage int compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); - set_fs(old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; -} - static inline void do_compat_cache_op(unsigned long start, unsigned long end, int flags) { diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 840838769853..cec8aae5cbf8 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -164,7 +164,7 @@ COMPAT_SYS_SPU(sched_getscheduler) SYSCALL_SPU(sched_yield) COMPAT_SYS_SPU(sched_get_priority_max) COMPAT_SYS_SPU(sched_get_priority_min) -COMPAT_SYS_SPU(sched_rr_get_interval) +SYSX_SPU(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval_wrapper,sys_sched_rr_get_interval) COMPAT_SYS_SPU(nanosleep) SYSCALL_SPU(mremap) SYSCALL_SPU(setresuid) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 76fe846ec40e..bcbbe413c606 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -54,6 +54,7 @@ #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_NEWFSTATAT #define __ARCH_WANT_COMPAT_SYS_SENDFILE +#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif #define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 9c2ed90ece8f..8a93778ed9f5 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -175,19 +175,10 @@ asmlinkage long compat_sys_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 a * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) * and the register representation of a signed int (msr in 64-bit mode) is performed. */ -asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs (); - - /* The __user pointer cast is valid because of the set_fs() */ - set_fs (KERNEL_DS); - ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t); - set_fs (old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; +asmlinkage long compat_sys_sched_rr_get_interval_wrapper(u32 pid, + struct compat_timespec __user *interval) +{ + return compat_sys_sched_rr_get_interval((int)pid, interval); } /* Note: it is necessary to treat mode as an unsigned int, diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index c3e5d8b64171..497386a7ed28 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -45,6 +45,7 @@ #define __ARCH_WANT_COMPAT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_COMPAT_SYS_SENDFILE +#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif #define __ARCH_WANT_SYS_EXECVE diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 03c7e929ec34..4a4cdc633f6b 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -211,20 +211,6 @@ asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) return sys_sysfs(option, arg1, arg2); } -asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t); - set_fs (old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; -} - asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, compat_sigset_t __user *oset, diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index ca61fb4296b3..88f3c227afd9 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -296,8 +296,6 @@ long compat_sys_sync_file_range2(int fd, unsigned int flags, long compat_sys_fallocate(int fd, int mode, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi); -long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval); /* Assembly trampoline to avoid clobbering r0. */ long _compat_sys_rt_sigreturn(void); diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h index b51c6ee3cd6c..fe841e7d4963 100644 --- a/arch/tile/include/asm/unistd.h +++ b/arch/tile/include/asm/unistd.h @@ -14,6 +14,7 @@ /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */ #ifdef CONFIG_COMPAT #define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif #define __ARCH_WANT_SYS_NEWFSTATAT #define __ARCH_WANT_SYS_EXECVE diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 9cd7cb6041c0..7f72401b4f45 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -76,24 +76,6 @@ long compat_sys_fallocate(int fd, int mode, ((loff_t)len_hi << 32) | len_lo); } - - -long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, - (struct timespec __force __user *)&t); - set_fs(old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; -} - /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/include/linux/compat.h b/include/linux/compat.h index 784ebfe63c48..e4920bd58a47 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -588,6 +588,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); +asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval); + #else #define is_compat_task() (0) diff --git a/kernel/compat.c b/kernel/compat.c index c28a306ae05c..f6150e92dfc9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1215,6 +1215,23 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) return 0; } +#ifdef __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL +asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); + set_fs(old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} +#endif /* __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL */ + /* * Allocate user-space memory for the duration of a single system call, * in order to marshall parameters inside a compat thunk. -- cgit v1.2.3 From 4c925d6031f719fad6ea8b1c94a636f4c0fea39b Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Mon, 17 Dec 2012 16:03:04 -0800 Subject: kstrto*: add documentation As Bruce Fields pointed out, kstrto* is currently lacking kerneldoc comments. This patch adds kerneldoc comments to common variants of kstrto*: kstrto(u)l, kstrto(u)ll and kstrto(u)int. Signed-off-by: Eldad Zack Cc: J. Bruce Fields Cc: Joe Perches Cc: Randy Dunlap Cc: Alexey Dobriyan Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DocBook/kernel-api.tmpl | 3 ++ include/linux/kernel.h | 33 ++++++++++++++++++ lib/kstrtox.c | 64 +++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) (limited to 'include/linux') diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 00687ee9d363..f75ab4c1b281 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -58,6 +58,9 @@ String Conversions !Elib/vsprintf.c +!Finclude/linux/kernel.h kstrtol +!Finclude/linux/kernel.h kstrtoul +!Elib/kstrtox.c String Manipulation %s\n", __func__); + if (rtsx_pci_card_exclusive_check(host->pcr, RTSX_MS_CARD)) + return; + schedule_work(&host->handle_req); } @@ -441,6 +444,10 @@ static int rtsx_pci_ms_set_param(struct memstick_host *msh, dev_dbg(ms_dev(host), "%s: param = %d, value = %d\n", __func__, param, value); + err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_MS_CARD); + if (err) + return err; + switch (param) { case MEMSTICK_POWER: if (value == MEMSTICK_POWER_ON) diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 822237e322ba..481a98a10ecd 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -708,6 +708,25 @@ int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card) } EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off); +int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card) +{ + unsigned int cd_mask[] = { + [RTSX_SD_CARD] = SD_EXIST, + [RTSX_MS_CARD] = MS_EXIST + }; + + if (!pcr->ms_pmos) { + /* When using single PMOS, accessing card is not permitted + * if the existing card is not the designated one. + */ + if (pcr->card_exist & (~cd_mask[card])) + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_exclusive_check); + int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) { if (pcr->ops->switch_output_voltage) @@ -784,6 +803,9 @@ static void rtsx_pci_card_detect(struct work_struct *work) card_inserted = pcr->ops->cd_deglitch(pcr); card_detect = card_inserted | card_removed; + + pcr->card_exist |= card_inserted; + pcr->card_exist &= ~card_removed; } mutex_unlock(&pcr->pcr_mutex); @@ -976,6 +998,14 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) return err; } + /* No CD interrupt if probing driver with card inserted. + * So we need to initialize pcr->card_exist here. + */ + if (pcr->ops->cd_deglitch) + pcr->card_exist = pcr->ops->cd_deglitch(pcr); + else + pcr->card_exist = rtsx_pci_readl(pcr, RTSX_BIPR) & CARD_EXIST; + return 0; } diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index f74b5adca642..468c92303167 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -678,12 +678,19 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq) struct mmc_command *cmd = mrq->cmd; struct mmc_data *data = mrq->data; unsigned int data_size = 0; + int err; if (host->eject) { cmd->error = -ENOMEDIUM; goto finish; } + err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD); + if (err) { + cmd->error = err; + goto finish; + } + mutex_lock(&pcr->pcr_mutex); rtsx_pci_start_run(pcr); @@ -901,6 +908,9 @@ static void sdmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (host->eject) return; + if (rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD)) + return; + mutex_lock(&pcr->pcr_mutex); rtsx_pci_start_run(pcr); @@ -1073,6 +1083,10 @@ static int sdmmc_switch_voltage(struct mmc_host *mmc, struct mmc_ios *ios) if (host->eject) return -ENOMEDIUM; + err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD); + if (err) + return err; + mutex_lock(&pcr->pcr_mutex); rtsx_pci_start_run(pcr); @@ -1122,6 +1136,10 @@ static int sdmmc_execute_tuning(struct mmc_host *mmc, u32 opcode) if (host->eject) return -ENOMEDIUM; + err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD); + if (err) + return err; + mutex_lock(&pcr->pcr_mutex); rtsx_pci_start_run(pcr); diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 5d9b81e8aff4..26ea7f1b7caf 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -740,6 +740,7 @@ struct rtsx_pcr { unsigned int card_inserted; unsigned int card_removed; + unsigned int card_exist; struct delayed_work carddet_work; struct delayed_work idle_work; @@ -804,6 +805,7 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card); int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card); +int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card); int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage); unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr); void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr); -- cgit v1.2.3 From 407af3299ef1ac7e87ce3fb530e32a009d1a9efd Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:12 +0000 Subject: bridge: Add netlink interface to configure vlans on bridge ports Add a netlink interface to add and remove vlan configuration on bridge port. The interface uses the RTM_SETLINK message and encodes the vlan configuration inside the IFLA_AF_SPEC. It is possble to include multiple vlans to either add or remove in a single message. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 + include/uapi/linux/if_bridge.h | 9 +++ net/bridge/br_device.c | 1 + net/bridge/br_if.c | 1 + net/bridge/br_netlink.c | 139 +++++++++++++++++++++++++++++++++++------ net/bridge/br_private.h | 1 + net/core/rtnetlink.c | 72 +++++++++++++++++++++ 7 files changed, 207 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 25bd46f52877..1b90f9401000 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1020,6 +1020,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); }; diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5db297514aec..3ca9817ca7e8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -108,15 +108,24 @@ struct __fdb_entry { * [IFLA_AF_SPEC] = { * [IFLA_BRIDGE_FLAGS] * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] * } */ enum { IFLA_BRIDGE_FLAGS, IFLA_BRIDGE_MODE, + IFLA_BRIDGE_VLAN_INFO, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) +#define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ + +struct bridge_vlan_info { + u16 flags; + u16 vid; +}; + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 35a2c2c84f33..091bedf266a0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -316,6 +316,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index af9d65ab4001..335c60cebfd1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "br_private.h" diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 39ca9796f3f7..534a9f4587a9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "br_private.h" #include "br_private_stp.h" @@ -119,10 +120,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -144,6 +149,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ @@ -162,6 +168,64 @@ out: return err; } +const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid); + } else + err = br_vlan_add(br, vinfo->vid); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -241,6 +305,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -248,38 +313,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f0f24610d111..a42f9d49a64e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -713,6 +713,7 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c1e4db60eeca..2c9ccbfbd93c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2464,6 +2464,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2647,6 +2718,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } -- cgit v1.2.3 From 6cbdceeb1cb12c7d620161925a8c3e81daadb2e4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:13 +0000 Subject: bridge: Dump vlan information from a bridge port Using the RTM_GETLINK dump the vlan filter list of a given bridge port. The information depends on setting the filter flag similar to how nic VF info is dumped. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- include/linux/netdevice.h | 3 +- include/uapi/linux/rtnetlink.h | 1 + net/bridge/br_netlink.c | 94 +++++++++++++++++++++++---- net/bridge/br_private.h | 3 +- net/bridge/br_vlan.c | 2 + net/core/rtnetlink.c | 16 +++-- 7 files changed, 104 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6999269b3a4a..4e2aa47193cb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7079,7 +7079,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, } static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, + u32 filter_mask) { struct ixgbe_adapter *adapter = netdev_priv(dev); u16 mode; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b90f9401000..1964ca66df56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1019,7 +1019,8 @@ struct net_device_ops { struct nlmsghdr *nlh); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, + u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a5eb196ade9..7a2144e1afae 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -630,6 +630,7 @@ struct tcamsg { /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) /* End of information exported to user level */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 534a9f4587a9..fe1980d5a7e4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -65,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -99,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -108,6 +114,40 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -135,7 +175,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -154,16 +194,17 @@ errout: * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } @@ -395,6 +436,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -408,11 +472,18 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_link_register(&br_link_ops); + err = rtnl_af_register(&br_af_ops); if (err) goto out; + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); out: br_mdb_uninit(); return err; @@ -421,5 +492,6 @@ out: void __exit br_netlink_fini(void) { br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a42f9d49a64e..ce2235255c2f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -73,6 +73,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -715,7 +716,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d8690bfe63d4..f2bf5a197ea3 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -28,6 +28,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) } set_bit(vid, v->vlan_bitmap); + v->num_vlans++; return 0; } @@ -44,6 +45,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) } clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c9ccbfbd93c..f3a112ec86d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2315,6 +2315,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { @@ -2324,14 +2331,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2372,14 +2380,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } -- cgit v1.2.3 From 1690be63a27b20ae65c792729a44f5970561ffa4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:18 +0000 Subject: bridge: Add vlan support to static neighbors When a user adds bridge neighbors, allow him to specify VLAN id. If the VLAN id is not specified, the neighbor will be added for VLANs currently in the ports filter list. If no VLANs are configured on the port, we use vlan 0 and only add 1 entry. Signed-off-by: Vlad Yasevich Acked-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 +- drivers/net/macvlan.c | 2 +- drivers/net/vxlan.c | 3 +- include/linux/netdevice.h | 4 +- include/uapi/linux/neighbour.h | 1 + net/bridge/br_fdb.c | 148 ++++++++++++++++++++--- net/bridge/br_private.h | 6 +- net/core/rtnetlink.c | 26 ++-- 10 files changed, 162 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4e2aa47193cb..1c0efcb7920f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7002,7 +7002,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 937bcc3d3212..5088dc5c3d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1959,6 +1959,7 @@ static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } static int mlx4_en_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index b745194391a1..b95316831587 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -247,8 +247,8 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) return 0; } -static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev, - const unsigned char *addr) +static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *netdev, const unsigned char *addr) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = -EOPNOTSUPP; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index e4b8078e88a9..defcd8a85744 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -599,7 +599,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int macvlan_fdb_del(struct ndmsg *ndm, +static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 72485b9b9005..9d70421cf3a0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -393,7 +393,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* Delete entry (via netlink) */ -static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct vxlan_dev *vxlan = netdev_priv(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1964ca66df56..9deb672d999f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -884,7 +884,8 @@ struct netdev_fcoe_hbainfo { * struct net_device *dev, * const unsigned char *addr, u16 flags) * Adds an FDB entry to dev for addr. - * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, + * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1008,6 +1009,7 @@ struct net_device_ops { const unsigned char *addr, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); int (*ndo_fdb_dump)(struct sk_buff *skb, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 275e5d65dcb2..adb068c53c4e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -20,6 +20,7 @@ enum { NDA_LLADDR, NDA_CACHEINFO, NDA_PROBES, + NDA_VLAN, __NDA_MAX }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 276a52254606..4b75ad43aa85 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -505,6 +505,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -516,6 +520,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -617,6 +622,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -624,12 +648,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -637,41 +678,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr, 0); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags, - 0); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, 0); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -679,9 +769,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22915c8e9961..799dbb37e5a2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -388,7 +388,7 @@ extern void br_fdb_update(struct net_bridge *br, const unsigned char *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -577,13 +577,13 @@ extern void nbp_vlan_flush(struct net_bridge_port *port); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) { - return rcu_dereference(br->vlan_info); + return rcu_dereference_rtnl(br->vlan_info); } static inline struct net_port_vlans *nbp_get_vlan_info( const struct net_bridge_port *p) { - return rcu_dereference(p->vlan_info); + return rcu_dereference_rtnl(p->vlan_info); } /* Since bridge now depends on 8021Q module, but the time bridge sees the diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f3a112ec86d5..d8aa20f6a46e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2119,13 +2119,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2139,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -2155,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) const struct net_device_ops *ops = br_dev->netdev_ops; if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, dev, addr); + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2165,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); -- cgit v1.2.3 From 3ebc21f7bc2f9c0145bbbf0f12430b766a200f9f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 31 Jan 2013 16:02:01 -0600 Subject: libceph: fix messenger CONFIG_BLOCK dependencies The ceph messenger has a few spots that are only used when bio messages are supported, and that's only when CONFIG_BLOCK is defined. This surrounds a couple of spots with #ifdef's that would cause a problem if CONFIG_BLOCK were not present in the kernel configuration. This resolves: http://tracker.ceph.com/issues/3976 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/messenger.h | 2 ++ net/ceph/messenger.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 14ba5ee738a9..60903e0f665c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -83,9 +83,11 @@ struct ceph_msg { struct list_head list_head; struct kref kref; +#ifdef CONFIG_BLOCK struct bio *bio; /* instead of pages/pagelist */ struct bio *bio_iter; /* bio iterator */ int bio_seg; /* current bio segment */ +#endif /* CONFIG_BLOCK */ struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5ccf87ed8d68..8a62a559a2aa 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -9,8 +9,9 @@ #include #include #include +#ifdef CONFIG_BLOCK #include -#include +#endif /* CONFIG_BLOCK */ #include #include @@ -2651,9 +2652,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, m->page_alignment = 0; m->pages = NULL; m->pagelist = NULL; +#ifdef CONFIG_BLOCK m->bio = NULL; m->bio_iter = NULL; m->bio_seg = 0; +#endif /* CONFIG_BLOCK */ m->trail = NULL; /* front */ -- cgit v1.2.3 From 72fe25e3460c8673984370208e0e6261101372d6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 30 Jan 2013 11:13:33 -0600 Subject: libceph: add a compatibility check interface An upcoming change implements semantic change that could lead to a crash if an old version of the libceph kernel module is used with a new version of the rbd kernel module. In order to preclude that possibility, this adds a compatibilty check interface. If this interface doesn't exist, the modules are obviously not compatible. But if it does exist, this provides a way of letting the caller know whether it will operate properly with this libceph module. Perhaps confusingly, it returns false right now. The semantic change mentioned above will make it return true. This resolves: http://tracker.ceph.com/issues/3800 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/libceph.h | 2 ++ net/ceph/ceph_common.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 084d3c622b12..c44275ab375c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len) } /* ceph_common.c */ +extern bool libceph_compatible(void *data); + extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern struct kmem_cache *ceph_inode_cachep; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ee71ea26777a..a98c03ff853f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -26,6 +26,22 @@ #include "crypto.h" +/* + * Module compatibility interface. For now it doesn't do anything, + * but its existence signals a certain level of functionality. + * + * The data buffer is used to pass information both to and from + * libceph. The return value indicates whether libceph determines + * it is compatible with the caller (from another kernel module), + * given the provided data. + * + * The data pointer can be null. + */ +bool libceph_compatible(void *data) +{ + return false; +} +EXPORT_SYMBOL(libceph_compatible); /* * find filename portion of a path (/foo/bar/baz -> baz) -- cgit v1.2.3 From 112202d9098aae2c36436e5178c0cf3ced423c7b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 Feb 2013 19:29:12 -0800 Subject: workqueue: rename cpu_workqueue to pool_workqueue workqueue has moved away from global_cwqs to worker_pools and with the scheduled custom worker pools, wforkqueues will be associated with pools which don't have anything to do with CPUs. The workqueue code went through significant amount of changes recently and mass renaming isn't likely to hurt much additionally. Let's replace 'cpu' with 'pool' so that it reflects the current design. * s/struct cpu_workqueue_struct/struct pool_workqueue/ * s/cpu_wq/pool_wq/ * s/cwq/pwq/ This patch is purely cosmetic. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 12 +- include/trace/events/workqueue.h | 10 +- kernel/workqueue.c | 433 +++++++++++++++++++-------------------- kernel/workqueue_internal.h | 2 +- 4 files changed, 228 insertions(+), 229 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a3d7556510c3..8afab27cdbc2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data); enum { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ - WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ + WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ @@ -40,7 +40,7 @@ enum { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, - WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, + WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, @@ -60,14 +60,14 @@ enum { WORK_CPU_END = NR_CPUS + 1, /* - * Reserve 7 bits off of cwq pointer w/ debugobjects turned - * off. This makes cwqs aligned to 256 bytes and allows 15 - * workqueue flush colors. + * Reserve 7 bits off of pwq pointer w/ debugobjects turned off. + * This makes pwqs aligned to 256 bytes and allows 15 workqueue + * flush colors. */ WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, - /* data contains off-queue information when !WORK_STRUCT_CWQ */ + /* data contains off-queue information when !WORK_STRUCT_PWQ */ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 4e798e384a6a..bf0e18ba6cfb 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work, /** * workqueue_queue_work - called when a work gets queued * @req_cpu: the requested cpu - * @cwq: pointer to struct cpu_workqueue_struct + * @pwq: pointer to struct pool_workqueue * @work: pointer to struct work_struct * * This event occurs when a work is queued immediately or once a @@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work, */ TRACE_EVENT(workqueue_queue_work, - TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq, + TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq, struct work_struct *work), - TP_ARGS(req_cpu, cwq, work), + TP_ARGS(req_cpu, pwq, work), TP_STRUCT__entry( __field( void *, work ) @@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work, TP_fast_assign( __entry->work = work; __entry->function = work->func; - __entry->workqueue = cwq->wq; + __entry->workqueue = pwq->wq; __entry->req_cpu = req_cpu; - __entry->cpu = cwq->pool->cpu; + __entry->cpu = pwq->pool->cpu; ), TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ea7f696f1060..0f1a264d0f22 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -154,11 +154,12 @@ struct worker_pool { } ____cacheline_aligned_in_smp; /* - * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of - * work_struct->data are used for flags and thus cwqs need to be - * aligned at two's power of the number of flag bits. + * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS + * of work_struct->data are used for flags and the remaining high bits + * point to the pwq; thus, pwqs need to be aligned at two's power of the + * number of flag bits. */ -struct cpu_workqueue_struct { +struct pool_workqueue { struct worker_pool *pool; /* I: the associated pool */ struct workqueue_struct *wq; /* I: the owning workqueue */ int work_color; /* L: current color */ @@ -207,16 +208,16 @@ typedef unsigned long mayday_mask_t; struct workqueue_struct { unsigned int flags; /* W: WQ_* flags */ union { - struct cpu_workqueue_struct __percpu *pcpu; - struct cpu_workqueue_struct *single; + struct pool_workqueue __percpu *pcpu; + struct pool_workqueue *single; unsigned long v; - } cpu_wq; /* I: cwq's */ + } pool_wq; /* I: pwq's */ struct list_head list; /* W: list of all workqueues */ struct mutex flush_mutex; /* protects wq flushing */ int work_color; /* F: current work color */ int flush_color; /* F: current flush color */ - atomic_t nr_cwqs_to_flush; /* flush in progress */ + atomic_t nr_pwqs_to_flush; /* flush in progress */ struct wq_flusher *first_flusher; /* F: first flusher */ struct list_head flusher_queue; /* F: flush waiters */ struct list_head flusher_overflow; /* F: flush overflow list */ @@ -225,7 +226,7 @@ struct workqueue_struct { struct worker *rescuer; /* I: rescue worker */ int nr_drainers; /* W: drain in progress */ - int saved_max_active; /* W: saved cwq max_active */ + int saved_max_active; /* W: saved pwq max_active */ #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif @@ -268,7 +269,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, return WORK_CPU_END; } -static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask, +static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask, struct workqueue_struct *wq) { return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); @@ -284,7 +285,7 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask, * * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND - * for_each_cwq_cpu() : possible CPUs for bound workqueues, + * for_each_pwq_cpu() : possible CPUs for bound workqueues, * WORK_CPU_UNBOUND for unbound workqueues */ #define for_each_wq_cpu(cpu) \ @@ -297,10 +298,10 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask, (cpu) < WORK_CPU_END; \ (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3)) -#define for_each_cwq_cpu(cpu, wq) \ - for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq)); \ +#define for_each_pwq_cpu(cpu, wq) \ + for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \ (cpu) < WORK_CPU_END; \ - (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq))) + (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq))) #ifdef CONFIG_DEBUG_OBJECTS_WORK @@ -479,14 +480,14 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri) return &pools[highpri]; } -static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, - struct workqueue_struct *wq) +static struct pool_workqueue *get_pwq(unsigned int cpu, + struct workqueue_struct *wq) { if (!(wq->flags & WQ_UNBOUND)) { if (likely(cpu < nr_cpu_ids)) - return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); + return per_cpu_ptr(wq->pool_wq.pcpu, cpu); } else if (likely(cpu == WORK_CPU_UNBOUND)) - return wq->cpu_wq.single; + return wq->pool_wq.single; return NULL; } @@ -507,18 +508,18 @@ static int work_next_color(int color) } /* - * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data - * contain the pointer to the queued cwq. Once execution starts, the flag + * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data + * contain the pointer to the queued pwq. Once execution starts, the flag * is cleared and the high bits contain OFFQ flags and pool ID. * - * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling() - * and clear_work_data() can be used to set the cwq, pool or clear + * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling() + * and clear_work_data() can be used to set the pwq, pool or clear * work->data. These functions should only be called while the work is * owned - ie. while the PENDING bit is set. * - * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq + * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq * corresponding to a work. Pool is available once the work has been - * queued anywhere after initialization until it is sync canceled. cwq is + * queued anywhere after initialization until it is sync canceled. pwq is * available only while the work item is queued. * * %WORK_OFFQ_CANCELING is used to mark a work item which is being @@ -533,12 +534,11 @@ static inline void set_work_data(struct work_struct *work, unsigned long data, atomic_long_set(&work->data, data | flags | work_static(work)); } -static void set_work_cwq(struct work_struct *work, - struct cpu_workqueue_struct *cwq, +static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq, unsigned long extra_flags) { - set_work_data(work, (unsigned long)cwq, - WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); + set_work_data(work, (unsigned long)pwq, + WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags); } static void set_work_pool_and_keep_pending(struct work_struct *work, @@ -567,11 +567,11 @@ static void clear_work_data(struct work_struct *work) set_work_data(work, WORK_STRUCT_NO_POOL, 0); } -static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work) +static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - if (data & WORK_STRUCT_CWQ) + if (data & WORK_STRUCT_PWQ) return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); else return NULL; @@ -589,8 +589,8 @@ static struct worker_pool *get_work_pool(struct work_struct *work) struct worker_pool *pool; int pool_id; - if (data & WORK_STRUCT_CWQ) - return ((struct cpu_workqueue_struct *) + if (data & WORK_STRUCT_PWQ) + return ((struct pool_workqueue *) (data & WORK_STRUCT_WQ_DATA_MASK))->pool; pool_id = data >> WORK_OFFQ_POOL_SHIFT; @@ -613,8 +613,8 @@ static int get_work_pool_id(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - if (data & WORK_STRUCT_CWQ) - return ((struct cpu_workqueue_struct *) + if (data & WORK_STRUCT_PWQ) + return ((struct pool_workqueue *) (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id; return data >> WORK_OFFQ_POOL_SHIFT; @@ -632,7 +632,7 @@ static bool work_is_canceling(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); + return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING); } /* @@ -961,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head, *nextp = n; } -static void cwq_activate_delayed_work(struct work_struct *work) +static void pwq_activate_delayed_work(struct work_struct *work) { - struct cpu_workqueue_struct *cwq = get_work_cwq(work); + struct pool_workqueue *pwq = get_work_pwq(work); trace_workqueue_activate_work(work); - move_linked_works(work, &cwq->pool->worklist, NULL); + move_linked_works(work, &pwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); - cwq->nr_active++; + pwq->nr_active++; } -static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +static void pwq_activate_first_delayed(struct pool_workqueue *pwq) { - struct work_struct *work = list_first_entry(&cwq->delayed_works, + struct work_struct *work = list_first_entry(&pwq->delayed_works, struct work_struct, entry); - cwq_activate_delayed_work(work); + pwq_activate_delayed_work(work); } /** - * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight - * @cwq: cwq of interest + * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight + * @pwq: pwq of interest * @color: color of work which left the queue * * A work either has completed or is removed from pending queue, - * decrement nr_in_flight of its cwq and handle workqueue flushing. + * decrement nr_in_flight of its pwq and handle workqueue flushing. * * CONTEXT: * spin_lock_irq(pool->lock). */ -static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) +static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color) { /* ignore uncolored works */ if (color == WORK_NO_COLOR) return; - cwq->nr_in_flight[color]--; + pwq->nr_in_flight[color]--; - cwq->nr_active--; - if (!list_empty(&cwq->delayed_works)) { + pwq->nr_active--; + if (!list_empty(&pwq->delayed_works)) { /* one down, submit a delayed one */ - if (cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); + if (pwq->nr_active < pwq->max_active) + pwq_activate_first_delayed(pwq); } /* is flush in progress and are we at the flushing tip? */ - if (likely(cwq->flush_color != color)) + if (likely(pwq->flush_color != color)) return; /* are there still in-flight works? */ - if (cwq->nr_in_flight[color]) + if (pwq->nr_in_flight[color]) return; - /* this cwq is done, clear flush_color */ - cwq->flush_color = -1; + /* this pwq is done, clear flush_color */ + pwq->flush_color = -1; /* - * If this was the last cwq, wake up the first flusher. It + * If this was the last pwq, wake up the first flusher. It * will handle the rest. */ - if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) - complete(&cwq->wq->first_flusher->done); + if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush)) + complete(&pwq->wq->first_flusher->done); } /** @@ -1053,7 +1053,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, unsigned long *flags) { struct worker_pool *pool; - struct cpu_workqueue_struct *cwq; + struct pool_workqueue *pwq; local_irq_save(*flags); @@ -1084,31 +1084,31 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, spin_lock(&pool->lock); /* - * work->data is guaranteed to point to cwq only while the work - * item is queued on cwq->wq, and both updating work->data to point - * to cwq on queueing and to pool on dequeueing are done under - * cwq->pool->lock. This in turn guarantees that, if work->data - * points to cwq which is associated with a locked pool, the work + * work->data is guaranteed to point to pwq only while the work + * item is queued on pwq->wq, and both updating work->data to point + * to pwq on queueing and to pool on dequeueing are done under + * pwq->pool->lock. This in turn guarantees that, if work->data + * points to pwq which is associated with a locked pool, the work * item is currently queued on that pool. */ - cwq = get_work_cwq(work); - if (cwq && cwq->pool == pool) { + pwq = get_work_pwq(work); + if (pwq && pwq->pool == pool) { debug_work_deactivate(work); /* * A delayed work item cannot be grabbed directly because * it might have linked NO_COLOR work items which, if left - * on the delayed_list, will confuse cwq->nr_active + * on the delayed_list, will confuse pwq->nr_active * management later on and cause stall. Make sure the work * item is activated before grabbing. */ if (*work_data_bits(work) & WORK_STRUCT_DELAYED) - cwq_activate_delayed_work(work); + pwq_activate_delayed_work(work); list_del_init(&work->entry); - cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work)); + pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work)); - /* work->data points to cwq iff queued, point to pool */ + /* work->data points to pwq iff queued, point to pool */ set_work_pool_and_keep_pending(work, pool->id); spin_unlock(&pool->lock); @@ -1125,25 +1125,24 @@ fail: /** * insert_work - insert a work into a pool - * @cwq: cwq @work belongs to + * @pwq: pwq @work belongs to * @work: work to insert * @head: insertion point * @extra_flags: extra WORK_STRUCT_* flags to set * - * Insert @work which belongs to @cwq after @head. @extra_flags is or'd to + * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to * work_struct flags. * * CONTEXT: * spin_lock_irq(pool->lock). */ -static void insert_work(struct cpu_workqueue_struct *cwq, - struct work_struct *work, struct list_head *head, - unsigned int extra_flags) +static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, + struct list_head *head, unsigned int extra_flags) { - struct worker_pool *pool = cwq->pool; + struct worker_pool *pool = pwq->pool; /* we own @work, set data and link */ - set_work_cwq(work, cwq, extra_flags); + set_work_pwq(work, pwq, extra_flags); list_add_tail(&work->entry, head); /* @@ -1170,13 +1169,13 @@ static bool is_chained_work(struct workqueue_struct *wq) * Return %true iff I'm a worker execuing a work item on @wq. If * I'm @worker, it's safe to dereference it without locking. */ - return worker && worker->current_cwq->wq == wq; + return worker && worker->current_pwq->wq == wq; } static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, struct work_struct *work) { - struct cpu_workqueue_struct *cwq; + struct pool_workqueue *pwq; struct list_head *worklist; unsigned int work_flags; unsigned int req_cpu = cpu; @@ -1196,7 +1195,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, WARN_ON_ONCE(!is_chained_work(wq))) return; - /* determine the cwq to use */ + /* determine the pwq to use */ if (!(wq->flags & WQ_UNBOUND)) { struct worker_pool *last_pool; @@ -1209,54 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, * work needs to be queued on that cpu to guarantee * non-reentrancy. */ - cwq = get_cwq(cpu, wq); + pwq = get_pwq(cpu, wq); last_pool = get_work_pool(work); - if (last_pool && last_pool != cwq->pool) { + if (last_pool && last_pool != pwq->pool) { struct worker *worker; spin_lock(&last_pool->lock); worker = find_worker_executing_work(last_pool, work); - if (worker && worker->current_cwq->wq == wq) { - cwq = get_cwq(last_pool->cpu, wq); + if (worker && worker->current_pwq->wq == wq) { + pwq = get_pwq(last_pool->cpu, wq); } else { /* meh... not running there, queue here */ spin_unlock(&last_pool->lock); - spin_lock(&cwq->pool->lock); + spin_lock(&pwq->pool->lock); } } else { - spin_lock(&cwq->pool->lock); + spin_lock(&pwq->pool->lock); } } else { - cwq = get_cwq(WORK_CPU_UNBOUND, wq); - spin_lock(&cwq->pool->lock); + pwq = get_pwq(WORK_CPU_UNBOUND, wq); + spin_lock(&pwq->pool->lock); } - /* cwq determined, queue */ - trace_workqueue_queue_work(req_cpu, cwq, work); + /* pwq determined, queue */ + trace_workqueue_queue_work(req_cpu, pwq, work); if (WARN_ON(!list_empty(&work->entry))) { - spin_unlock(&cwq->pool->lock); + spin_unlock(&pwq->pool->lock); return; } - cwq->nr_in_flight[cwq->work_color]++; - work_flags = work_color_to_flags(cwq->work_color); + pwq->nr_in_flight[pwq->work_color]++; + work_flags = work_color_to_flags(pwq->work_color); - if (likely(cwq->nr_active < cwq->max_active)) { + if (likely(pwq->nr_active < pwq->max_active)) { trace_workqueue_activate_work(work); - cwq->nr_active++; - worklist = &cwq->pool->worklist; + pwq->nr_active++; + worklist = &pwq->pool->worklist; } else { work_flags |= WORK_STRUCT_DELAYED; - worklist = &cwq->delayed_works; + worklist = &pwq->delayed_works; } - insert_work(cwq, work, worklist, work_flags); + insert_work(pwq, work, worklist, work_flags); - spin_unlock(&cwq->pool->lock); + spin_unlock(&pwq->pool->lock); } /** @@ -1661,14 +1660,14 @@ static void rebind_workers(struct worker_pool *pool) /* * wq doesn't really matter but let's keep @worker->pool - * and @cwq->pool consistent for sanity. + * and @pwq->pool consistent for sanity. */ if (std_worker_pool_pri(worker->pool)) wq = system_highpri_wq; else wq = system_wq; - insert_work(get_cwq(pool->cpu, wq), rebind_work, + insert_work(get_pwq(pool->cpu, wq), rebind_work, worker->scheduled.next, work_color_to_flags(WORK_NO_COLOR)); } @@ -1845,15 +1844,15 @@ static void idle_worker_timeout(unsigned long __pool) static bool send_mayday(struct work_struct *work) { - struct cpu_workqueue_struct *cwq = get_work_cwq(work); - struct workqueue_struct *wq = cwq->wq; + struct pool_workqueue *pwq = get_work_pwq(work); + struct workqueue_struct *wq = pwq->wq; unsigned int cpu; if (!(wq->flags & WQ_RESCUER)) return false; /* mayday mayday mayday */ - cpu = cwq->pool->cpu; + cpu = pwq->pool->cpu; /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */ if (cpu == WORK_CPU_UNBOUND) cpu = 0; @@ -2082,9 +2081,9 @@ static void process_one_work(struct worker *worker, struct work_struct *work) __releases(&pool->lock) __acquires(&pool->lock) { - struct cpu_workqueue_struct *cwq = get_work_cwq(work); + struct pool_workqueue *pwq = get_work_pwq(work); struct worker_pool *pool = worker->pool; - bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE; + bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE; int work_color; struct worker *collision; #ifdef CONFIG_LOCKDEP @@ -2125,7 +2124,7 @@ __acquires(&pool->lock) hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work); worker->current_work = work; worker->current_func = work->func; - worker->current_cwq = cwq; + worker->current_pwq = pwq; work_color = get_work_color(work); list_del_init(&work->entry); @@ -2154,7 +2153,7 @@ __acquires(&pool->lock) spin_unlock_irq(&pool->lock); - lock_map_acquire_read(&cwq->wq->lockdep_map); + lock_map_acquire_read(&pwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); trace_workqueue_execute_start(work); worker->current_func(work); @@ -2164,7 +2163,7 @@ __acquires(&pool->lock) */ trace_workqueue_execute_end(work); lock_map_release(&lockdep_map); - lock_map_release(&cwq->wq->lockdep_map); + lock_map_release(&pwq->wq->lockdep_map); if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" @@ -2185,8 +2184,8 @@ __acquires(&pool->lock) hash_del(&worker->hentry); worker->current_work = NULL; worker->current_func = NULL; - worker->current_cwq = NULL; - cwq_dec_nr_in_flight(cwq, work_color); + worker->current_pwq = NULL; + pwq_dec_nr_in_flight(pwq, work_color); } /** @@ -2353,8 +2352,8 @@ repeat: */ for_each_mayday_cpu(cpu, wq->mayday_mask) { unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu; - struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq); - struct worker_pool *pool = cwq->pool; + struct pool_workqueue *pwq = get_pwq(tcpu, wq); + struct worker_pool *pool = pwq->pool; struct work_struct *work, *n; __set_current_state(TASK_RUNNING); @@ -2370,7 +2369,7 @@ repeat: */ BUG_ON(!list_empty(&rescuer->scheduled)); list_for_each_entry_safe(work, n, &pool->worklist, entry) - if (get_work_cwq(work) == cwq) + if (get_work_pwq(work) == pwq) move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); @@ -2405,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work) /** * insert_wq_barrier - insert a barrier work - * @cwq: cwq to insert barrier into + * @pwq: pwq to insert barrier into * @barr: wq_barrier to insert * @target: target work to attach @barr to * @worker: worker currently executing @target, NULL if @target is not executing @@ -2422,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work) * after a work with LINKED flag set. * * Note that when @worker is non-NULL, @target may be modified - * underneath us, so we can't reliably determine cwq from @target. + * underneath us, so we can't reliably determine pwq from @target. * * CONTEXT: * spin_lock_irq(pool->lock). */ -static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, +static void insert_wq_barrier(struct pool_workqueue *pwq, struct wq_barrier *barr, struct work_struct *target, struct worker *worker) { @@ -2460,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, } debug_work_activate(&barr->work); - insert_work(cwq, &barr->work, head, + insert_work(pwq, &barr->work, head, work_color_to_flags(WORK_NO_COLOR) | linked); } /** - * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing + * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing * @wq: workqueue being flushed * @flush_color: new flush color, < 0 for no-op * @work_color: new work color, < 0 for no-op * - * Prepare cwqs for workqueue flushing. + * Prepare pwqs for workqueue flushing. * - * If @flush_color is non-negative, flush_color on all cwqs should be - * -1. If no cwq has in-flight commands at the specified color, all - * cwq->flush_color's stay at -1 and %false is returned. If any cwq - * has in flight commands, its cwq->flush_color is set to - * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq + * If @flush_color is non-negative, flush_color on all pwqs should be + * -1. If no pwq has in-flight commands at the specified color, all + * pwq->flush_color's stay at -1 and %false is returned. If any pwq + * has in flight commands, its pwq->flush_color is set to + * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq * wakeup logic is armed and %true is returned. * * The caller should have initialized @wq->first_flusher prior to @@ -2484,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, * @flush_color is negative, no flush color update is done and %false * is returned. * - * If @work_color is non-negative, all cwqs should have the same + * If @work_color is non-negative, all pwqs should have the same * work_color which is previous to @work_color and all will be * advanced to @work_color. * @@ -2495,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, * %true if @flush_color >= 0 and there's something to flush. %false * otherwise. */ -static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq, +static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, int flush_color, int work_color) { bool wait = false; unsigned int cpu; if (flush_color >= 0) { - BUG_ON(atomic_read(&wq->nr_cwqs_to_flush)); - atomic_set(&wq->nr_cwqs_to_flush, 1); + BUG_ON(atomic_read(&wq->nr_pwqs_to_flush)); + atomic_set(&wq->nr_pwqs_to_flush, 1); } - for_each_cwq_cpu(cpu, wq) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - struct worker_pool *pool = cwq->pool; + for_each_pwq_cpu(cpu, wq) { + struct pool_workqueue *pwq = get_pwq(cpu, wq); + struct worker_pool *pool = pwq->pool; spin_lock_irq(&pool->lock); if (flush_color >= 0) { - BUG_ON(cwq->flush_color != -1); + BUG_ON(pwq->flush_color != -1); - if (cwq->nr_in_flight[flush_color]) { - cwq->flush_color = flush_color; - atomic_inc(&wq->nr_cwqs_to_flush); + if (pwq->nr_in_flight[flush_color]) { + pwq->flush_color = flush_color; + atomic_inc(&wq->nr_pwqs_to_flush); wait = true; } } if (work_color >= 0) { - BUG_ON(work_color != work_next_color(cwq->work_color)); - cwq->work_color = work_color; + BUG_ON(work_color != work_next_color(pwq->work_color)); + pwq->work_color = work_color; } spin_unlock_irq(&pool->lock); } - if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush)) + if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) complete(&wq->first_flusher->done); return wait; @@ -2581,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq) wq->first_flusher = &this_flusher; - if (!flush_workqueue_prep_cwqs(wq, wq->flush_color, + if (!flush_workqueue_prep_pwqs(wq, wq->flush_color, wq->work_color)) { /* nothing to flush, done */ wq->flush_color = next_color; @@ -2592,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq) /* wait in queue */ BUG_ON(wq->flush_color == this_flusher.flush_color); list_add_tail(&this_flusher.list, &wq->flusher_queue); - flush_workqueue_prep_cwqs(wq, -1, wq->work_color); + flush_workqueue_prep_pwqs(wq, -1, wq->work_color); } } else { /* @@ -2659,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq) list_splice_tail_init(&wq->flusher_overflow, &wq->flusher_queue); - flush_workqueue_prep_cwqs(wq, -1, wq->work_color); + flush_workqueue_prep_pwqs(wq, -1, wq->work_color); } if (list_empty(&wq->flusher_queue)) { @@ -2669,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq) /* * Need to flush more colors. Make the next flusher - * the new first flusher and arm cwqs. + * the new first flusher and arm pwqs. */ BUG_ON(wq->flush_color == wq->work_color); BUG_ON(wq->flush_color != next->flush_color); @@ -2677,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq) list_del_init(&next->list); wq->first_flusher = next; - if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1)) + if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1)) break; /* @@ -2720,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq) reflush: flush_workqueue(wq); - for_each_cwq_cpu(cpu, wq) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + for_each_pwq_cpu(cpu, wq) { + struct pool_workqueue *pwq = get_pwq(cpu, wq); bool drained; - spin_lock_irq(&cwq->pool->lock); - drained = !cwq->nr_active && list_empty(&cwq->delayed_works); - spin_unlock_irq(&cwq->pool->lock); + spin_lock_irq(&pwq->pool->lock); + drained = !pwq->nr_active && list_empty(&pwq->delayed_works); + spin_unlock_irq(&pwq->pool->lock); if (drained) continue; @@ -2749,7 +2748,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) { struct worker *worker = NULL; struct worker_pool *pool; - struct cpu_workqueue_struct *cwq; + struct pool_workqueue *pwq; might_sleep(); pool = get_work_pool(work); @@ -2758,18 +2757,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) spin_lock_irq(&pool->lock); /* see the comment in try_to_grab_pending() with the same code */ - cwq = get_work_cwq(work); - if (cwq) { - if (unlikely(cwq->pool != pool)) + pwq = get_work_pwq(work); + if (pwq) { + if (unlikely(pwq->pool != pool)) goto already_gone; } else { worker = find_worker_executing_work(pool, work); if (!worker) goto already_gone; - cwq = worker->current_cwq; + pwq = worker->current_pwq; } - insert_wq_barrier(cwq, barr, work, worker); + insert_wq_barrier(pwq, barr, work, worker); spin_unlock_irq(&pool->lock); /* @@ -2778,11 +2777,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) * flusher is not running on the same workqueue by verifying write * access. */ - if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) - lock_map_acquire(&cwq->wq->lockdep_map); + if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER) + lock_map_acquire(&pwq->wq->lockdep_map); else - lock_map_acquire_read(&cwq->wq->lockdep_map); - lock_map_release(&cwq->wq->lockdep_map); + lock_map_acquire_read(&pwq->wq->lockdep_map); + lock_map_release(&pwq->wq->lockdep_map); return true; already_gone: @@ -3092,46 +3091,46 @@ int keventd_up(void) return system_wq != NULL; } -static int alloc_cwqs(struct workqueue_struct *wq) +static int alloc_pwqs(struct workqueue_struct *wq) { /* - * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS. + * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS. * Make sure that the alignment isn't lower than that of * unsigned long long. */ - const size_t size = sizeof(struct cpu_workqueue_struct); + const size_t size = sizeof(struct pool_workqueue); const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS, __alignof__(unsigned long long)); if (!(wq->flags & WQ_UNBOUND)) - wq->cpu_wq.pcpu = __alloc_percpu(size, align); + wq->pool_wq.pcpu = __alloc_percpu(size, align); else { void *ptr; /* - * Allocate enough room to align cwq and put an extra + * Allocate enough room to align pwq and put an extra * pointer at the end pointing back to the originally * allocated pointer which will be used for free. */ ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL); if (ptr) { - wq->cpu_wq.single = PTR_ALIGN(ptr, align); - *(void **)(wq->cpu_wq.single + 1) = ptr; + wq->pool_wq.single = PTR_ALIGN(ptr, align); + *(void **)(wq->pool_wq.single + 1) = ptr; } } /* just in case, make sure it's actually aligned */ - BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); - return wq->cpu_wq.v ? 0 : -ENOMEM; + BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align)); + return wq->pool_wq.v ? 0 : -ENOMEM; } -static void free_cwqs(struct workqueue_struct *wq) +static void free_pwqs(struct workqueue_struct *wq) { if (!(wq->flags & WQ_UNBOUND)) - free_percpu(wq->cpu_wq.pcpu); - else if (wq->cpu_wq.single) { - /* the pointer to free is stored right after the cwq */ - kfree(*(void **)(wq->cpu_wq.single + 1)); + free_percpu(wq->pool_wq.pcpu); + else if (wq->pool_wq.single) { + /* the pointer to free is stored right after the pwq */ + kfree(*(void **)(wq->pool_wq.single + 1)); } } @@ -3185,25 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, wq->flags = flags; wq->saved_max_active = max_active; mutex_init(&wq->flush_mutex); - atomic_set(&wq->nr_cwqs_to_flush, 0); + atomic_set(&wq->nr_pwqs_to_flush, 0); INIT_LIST_HEAD(&wq->flusher_queue); INIT_LIST_HEAD(&wq->flusher_overflow); lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); INIT_LIST_HEAD(&wq->list); - if (alloc_cwqs(wq) < 0) + if (alloc_pwqs(wq) < 0) goto err; - for_each_cwq_cpu(cpu, wq) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + for_each_pwq_cpu(cpu, wq) { + struct pool_workqueue *pwq = get_pwq(cpu, wq); - BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); - cwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI); - cwq->wq = wq; - cwq->flush_color = -1; - cwq->max_active = max_active; - INIT_LIST_HEAD(&cwq->delayed_works); + BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK); + pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI); + pwq->wq = wq; + pwq->flush_color = -1; + pwq->max_active = max_active; + INIT_LIST_HEAD(&pwq->delayed_works); } if (flags & WQ_RESCUER) { @@ -3234,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, spin_lock(&workqueue_lock); if (workqueue_freezing && wq->flags & WQ_FREEZABLE) - for_each_cwq_cpu(cpu, wq) - get_cwq(cpu, wq)->max_active = 0; + for_each_pwq_cpu(cpu, wq) + get_pwq(cpu, wq)->max_active = 0; list_add(&wq->list, &workqueues); @@ -3244,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, return wq; err: if (wq) { - free_cwqs(wq); + free_pwqs(wq); free_mayday_mask(wq->mayday_mask); kfree(wq->rescuer); kfree(wq); @@ -3275,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq) spin_unlock(&workqueue_lock); /* sanity check */ - for_each_cwq_cpu(cpu, wq) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + for_each_pwq_cpu(cpu, wq) { + struct pool_workqueue *pwq = get_pwq(cpu, wq); int i; for (i = 0; i < WORK_NR_COLORS; i++) - BUG_ON(cwq->nr_in_flight[i]); - BUG_ON(cwq->nr_active); - BUG_ON(!list_empty(&cwq->delayed_works)); + BUG_ON(pwq->nr_in_flight[i]); + BUG_ON(pwq->nr_active); + BUG_ON(!list_empty(&pwq->delayed_works)); } if (wq->flags & WQ_RESCUER) { @@ -3291,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq) kfree(wq->rescuer); } - free_cwqs(wq); + free_pwqs(wq); kfree(wq); } EXPORT_SYMBOL_GPL(destroy_workqueue); /** - * cwq_set_max_active - adjust max_active of a cwq - * @cwq: target cpu_workqueue_struct + * pwq_set_max_active - adjust max_active of a pwq + * @pwq: target pool_workqueue * @max_active: new max_active value. * - * Set @cwq->max_active to @max_active and activate delayed works if + * Set @pwq->max_active to @max_active and activate delayed works if * increased. * * CONTEXT: * spin_lock_irq(pool->lock). */ -static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active) +static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active) { - cwq->max_active = max_active; + pwq->max_active = max_active; - while (!list_empty(&cwq->delayed_works) && - cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); + while (!list_empty(&pwq->delayed_works) && + pwq->nr_active < pwq->max_active) + pwq_activate_first_delayed(pwq); } /** @@ -3336,15 +3335,15 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) wq->saved_max_active = max_active; - for_each_cwq_cpu(cpu, wq) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - struct worker_pool *pool = cwq->pool; + for_each_pwq_cpu(cpu, wq) { + struct pool_workqueue *pwq = get_pwq(cpu, wq); + struct worker_pool *pool = pwq->pool; spin_lock_irq(&pool->lock); if (!(wq->flags & WQ_FREEZABLE) || !(pool->flags & POOL_FREEZING)) - cwq_set_max_active(cwq, max_active); + pwq_set_max_active(pwq, max_active); spin_unlock_irq(&pool->lock); } @@ -3367,9 +3366,9 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active); */ bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + struct pool_workqueue *pwq = get_pwq(cpu, wq); - return !list_empty(&cwq->delayed_works); + return !list_empty(&pwq->delayed_works); } EXPORT_SYMBOL_GPL(workqueue_congested); @@ -3408,7 +3407,7 @@ EXPORT_SYMBOL_GPL(work_busy); * CPU hotplug. * * There are two challenges in supporting CPU hotplug. Firstly, there - * are a lot of assumptions on strong associations among work, cwq and + * are a lot of assumptions on strong associations among work, pwq and * pool which make migrating pending and scheduled works very * difficult to implement without impacting hot paths. Secondly, * worker pools serve mix of short, long and very long running works making @@ -3612,11 +3611,11 @@ void freeze_workqueues_begin(void) pool->flags |= POOL_FREEZING; list_for_each_entry(wq, &workqueues, list) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + struct pool_workqueue *pwq = get_pwq(cpu, wq); - if (cwq && cwq->pool == pool && + if (pwq && pwq->pool == pool && (wq->flags & WQ_FREEZABLE)) - cwq->max_active = 0; + pwq->max_active = 0; } spin_unlock_irq(&pool->lock); @@ -3655,13 +3654,13 @@ bool freeze_workqueues_busy(void) * to peek without lock. */ list_for_each_entry(wq, &workqueues, list) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + struct pool_workqueue *pwq = get_pwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZABLE)) + if (!pwq || !(wq->flags & WQ_FREEZABLE)) continue; - BUG_ON(cwq->nr_active < 0); - if (cwq->nr_active) { + BUG_ON(pwq->nr_active < 0); + if (pwq->nr_active) { busy = true; goto out_unlock; } @@ -3701,14 +3700,14 @@ void thaw_workqueues(void) pool->flags &= ~POOL_FREEZING; list_for_each_entry(wq, &workqueues, list) { - struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + struct pool_workqueue *pwq = get_pwq(cpu, wq); - if (!cwq || cwq->pool != pool || + if (!pwq || pwq->pool != pool || !(wq->flags & WQ_FREEZABLE)) continue; /* restore max_active and repopulate worklist */ - cwq_set_max_active(cwq, wq->saved_max_active); + pwq_set_max_active(pwq, wq->saved_max_active); } wake_up_worker(pool); diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 328be4a269aa..07650264ec15 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h @@ -28,7 +28,7 @@ struct worker { struct work_struct *current_work; /* L: work being processed */ work_func_t current_func; /* L: current_work's fn */ - struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ + struct pool_workqueue *current_pwq; /* L: current_work's pwq */ struct list_head scheduled; /* L: scheduled works */ struct task_struct *task; /* I: worker task */ struct worker_pool *pool; /* I: the associated pool */ -- cgit v1.2.3 From b2dc0c2b7a84864fdbad2e264ce9b5c7068a0a0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Feb 2013 23:08:18 +0100 Subject: ARM: s3c: i2c: add platform_device forward declaration A recent cleanup to the mach-osiris.c file is causing build errors because the i2c-s3c2410.h header file is included before we see the definition for platform_device. The fix is to make the header file more robust against inclusion from other places. While this should normally go through the i2c tree, the bug only exists in arm-soc at the moment, so it's easier to fix it there before it goes upstream. Without this patch, building s3c2410_defconfig results in: arch/arm/mach-s3c24xx/mach-osiris.c:34:0: include/linux/platform_data/i2c-s3c2410.h:37:26: warning: 'struct platform_device' declared inside parameter list [enabled by default] Signed-off-by: Arnd Bergmann Cc: linux-i2c@vger.kernel.org Cc: Wolfram Sang Cc: Ben Dooks Cc: Kukjin Kim --- include/linux/platform_data/i2c-s3c2410.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-s3c2410.h b/include/linux/platform_data/i2c-s3c2410.h index 51d52e767a19..2a50048c1c44 100644 --- a/include/linux/platform_data/i2c-s3c2410.h +++ b/include/linux/platform_data/i2c-s3c2410.h @@ -15,6 +15,8 @@ #define S3C_IICFLG_FILTER (1<<0) /* enable s3c2440 filter */ +struct platform_device; + /** * struct s3c2410_platform_i2c - Platform data for s3c I2C. * @bus_num: The bus number to use (if possible). -- cgit v1.2.3 From e9b04b5b67ec628a5e9a312e14b6864f8f73ba12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Nov 2012 11:14:10 -0500 Subject: make do_sigaltstack() static Signed-off-by: Al Viro --- include/linux/sched.h | 1 - kernel/signal.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8f983293b403..6dd06494997a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2259,7 +2259,6 @@ extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); -extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); static inline void restore_saved_sigmask(void) { diff --git a/kernel/signal.c b/kernel/signal.c index 87c09e3061d2..6919050c8156 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3129,7 +3129,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) return 0; } -int +static int do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp) { stack_t oss; -- cgit v1.2.3 From d64008a8f30e0b381b292788ec6f3ee509b3bb40 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Nov 2012 23:12:10 -0500 Subject: burying unused conditionals __ARCH_WANT_SYS_RT_SIGACTION, __ARCH_WANT_SYS_RT_SIGSUSPEND, __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND, __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL - not used anymore CONFIG_GENERIC_{SIGALTSTACK,COMPAT_RT_SIG{ACTION,QUEUEINFO,PENDING,PROCMASK}} - can be assumed always set. --- arch/Kconfig | 15 --------------- arch/alpha/Kconfig | 1 - arch/alpha/include/asm/unistd.h | 1 - arch/arm/Kconfig | 1 - arch/arm/include/asm/unistd.h | 2 -- arch/arm64/Kconfig | 5 ----- arch/arm64/include/asm/unistd.h | 2 -- arch/avr32/Kconfig | 1 - arch/avr32/include/asm/unistd.h | 2 -- arch/blackfin/Kconfig | 1 - arch/blackfin/include/asm/unistd.h | 2 -- arch/c6x/Kconfig | 1 - arch/cris/Kconfig | 1 - arch/cris/include/asm/unistd.h | 2 -- arch/frv/Kconfig | 1 - arch/frv/include/asm/unistd.h | 2 -- arch/h8300/Kconfig | 1 - arch/h8300/include/asm/unistd.h | 2 -- arch/hexagon/Kconfig | 1 - arch/ia64/Kconfig | 1 - arch/ia64/include/asm/unistd.h | 3 --- arch/m32r/Kconfig | 1 - arch/m32r/include/asm/unistd.h | 2 -- arch/m68k/Kconfig | 1 - arch/m68k/include/asm/unistd.h | 2 -- arch/microblaze/Kconfig | 1 - arch/microblaze/include/asm/unistd.h | 2 -- arch/mips/Kconfig | 5 ----- arch/mips/include/asm/unistd.h | 1 - arch/mn10300/Kconfig | 1 - arch/mn10300/include/asm/unistd.h | 2 -- arch/openrisc/Kconfig | 1 - arch/parisc/Kconfig | 5 ----- arch/parisc/include/asm/unistd.h | 3 --- arch/powerpc/Kconfig | 5 ----- arch/powerpc/include/asm/unistd.h | 4 ---- arch/s390/Kconfig | 5 ----- arch/s390/include/asm/unistd.h | 3 --- arch/score/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/sh/include/asm/unistd.h | 2 -- arch/sparc/Kconfig | 5 ----- arch/sparc/include/asm/unistd.h | 3 --- arch/tile/Kconfig | 5 ----- arch/tile/include/asm/unistd.h | 1 - arch/unicore32/Kconfig | 1 - arch/x86/Kconfig | 4 ---- arch/x86/include/asm/unistd.h | 2 -- arch/x86/um/Kconfig | 1 - arch/xtensa/Kconfig | 1 - arch/xtensa/include/asm/unistd.h | 2 -- include/asm-generic/syscalls.h | 7 ------- include/asm-generic/unistd.h | 3 --- include/linux/compat.h | 14 -------------- include/linux/syscalls.h | 2 -- kernel/signal.c | 12 ------------ 56 files changed, 159 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index e50d3af294d4..956756c27ac6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -356,21 +356,6 @@ config MODULES_USE_ELF_REL Modules only use ELF REL relocations. Modules with ELF RELA relocations will give an error. -config GENERIC_SIGALTSTACK - bool - -config GENERIC_COMPAT_RT_SIGPROCMASK - bool - -config GENERIC_COMPAT_RT_SIGPENDING - bool - -config GENERIC_COMPAT_RT_SIGQUEUEINFO - bool - -config GENERIC_COMPAT_RT_SIGACTION - bool - # # ABI hall of shame # diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 15740cf29bd4..dd083c403ab3 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -22,7 +22,6 @@ config ALPHA select GENERIC_STRNLEN_USER select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK select ODD_RT_SIGACTION select OLD_SIGSUSPEND help diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index b3396ee039b7..6d6fe7ab5473 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -14,7 +14,6 @@ #define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index db3152d6dd88..fcb406633328 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -56,7 +56,6 @@ config ARM select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND select MODULES_USE_ELF_REL select CLONE_BACKWARDS - select GENERIC_SIGALTSTACK select OLD_SIGSUSPEND3 select OLD_SIGACTION help diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 21a2700d2957..e4ddfb39ca34 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -26,8 +26,6 @@ #define __ARCH_WANT_SYS_NICE #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_OLD_MMAP #define __ARCH_WANT_SYS_OLD_SELECT diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1f27c58f44ad..626ab20f12ea 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -6,15 +6,10 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGPENDING - select GENERIC_COMPAT_RT_SIGPROCMASK - select GENERIC_COMPAT_RT_SIGQUEUEINFO select GENERIC_HARDIRQS_NO_DEPRECATED select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW - select GENERIC_SIGALTSTACK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 744087fb521c..82ce217e94cf 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -20,10 +20,8 @@ #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE -#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_COMPAT_SYS_SENDFILE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index e888b72b6e10..2ae6591b3a55 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -17,7 +17,6 @@ config AVR32 select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK help AVR32 is a high-performance 32-bit RISC microprocessor core, designed for cost-sensitive embedded applications, with particular diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h index 0bdf6371574e..dc4d5a931112 100644 --- a/arch/avr32/include/asm/unistd.h +++ b/arch/avr32/include/asm/unistd.h @@ -37,8 +37,6 @@ #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_GETPGRP -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index a8a9ca7d40f3..b6f3ad5441c5 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -45,7 +45,6 @@ config BLACKFIN select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK config GENERIC_CSUM def_bool y diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index e943cb130048..04e83ea8d5cc 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -18,8 +18,6 @@ #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_VFORK /* diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 12d97b7ef0dc..f6a3648f5ec3 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -18,7 +18,6 @@ config C6X select OF_EARLY_FLATTREE select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK config MMU def_bool n diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index c2a1d0a8924c..0e5c187ac7d2 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -50,7 +50,6 @@ config CRIS select GENERIC_CMOS_UPDATE select MODULES_USE_ELF_RELA select CLONE_BACKWARDS2 - select GENERIC_SIGALTSTACK select OLD_SIGSUSPEND select OLD_SIGACTION diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h index 6d062bdf92d4..be57a988bfb9 100644 --- a/arch/cris/include/asm/unistd.h +++ b/arch/cris/include/asm/unistd.h @@ -30,8 +30,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index e3f8ffdd4e7b..b7465cd3dbbb 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -12,7 +12,6 @@ config FRV select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CPU_DEVICES select ARCH_WANT_IPC_PARSE_VERSION - select GENERIC_SIGALTSTACK select OLD_SIGSUSPEND3 select OLD_SIGACTION diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index d685da17f5fb..4cfcc7bba25a 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -27,8 +27,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT /* #define __ARCH_WANT_SYS_SIGPENDING */ #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 0b0176ce2c35..05b613af223a 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -9,7 +9,6 @@ config H8300 select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK select OLD_SIGSUSPEND3 select OLD_SIGACTION diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h index aa38105959fb..6721856d841b 100644 --- a/arch/h8300/include/asm/unistd.h +++ b/arch/h8300/include/asm/unistd.h @@ -29,8 +29,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 3e6e27c11f93..0744f7d7b1fd 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -31,7 +31,6 @@ config HEXAGON select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 98482d1cbc5b..3279646120e3 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -42,7 +42,6 @@ config IA64 select GENERIC_TIME_VSYSCALL_OLD select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index c827049eb62c..096373800f73 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -27,9 +27,6 @@ #define __IGNORE_vfork /* clone() */ #define __IGNORE_umount2 /* umount() */ -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND - #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) #include diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 1f550d4dd5d0..f807721e19a5 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -15,7 +15,6 @@ config M32R select GENERIC_ATOMIC64 select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK config SBUS bool diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h index 79b063caec85..555629b05267 100644 --- a/arch/m32r/include/asm/unistd.h +++ b/arch/m32r/include/asm/unistd.h @@ -20,8 +20,6 @@ #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_OLD_GETRLIMIT /*will be unused*/ #define __ARCH_WANT_SYS_OLDUMOUNT -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index a358bf63defe..efb1ce1f14a3 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -18,7 +18,6 @@ config M68K select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_REL select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK select OLD_SIGSUSPEND3 select OLD_SIGACTION diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 847994ce6804..df19631cc4de 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -29,8 +29,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 5e30d75c74ed..ba3b7c8c04b8 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -27,7 +27,6 @@ config MICROBLAZE select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA select CLONE_BACKWARDS - select GENERIC_SIGALTSTACK config SWAP def_bool n diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 10f8ac186855..b3778391d9cc 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -33,8 +33,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 0772b5c5bc72..a3d4646098fe 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -42,11 +42,6 @@ config MIPS select MODULES_USE_ELF_REL if MODULES select MODULES_USE_ELF_RELA if MODULES && 64BIT select CLONE_BACKWARDS - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPROCMASK - select GENERIC_COMPAT_RT_SIGPENDING menu "Machine selection" diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index 06f6463c24ad..64f661e32879 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -35,7 +35,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION # ifdef CONFIG_32BIT # define __ARCH_WANT_STAT64 # define __ARCH_WANT_SYS_TIME diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 12bf06f9abe5..ad0caea0bfea 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -10,7 +10,6 @@ config MN10300 select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK select OLD_SIGSUSPEND3 select OLD_SIGACTION diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index e6d2ed4ba68f..7f9d9adfa51e 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -41,8 +41,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index d3632eb98a1c..0ac66f67521f 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -22,7 +22,6 @@ config OPENRISC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 2bd407ffaebf..b77feffbadea 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -23,11 +23,6 @@ config PARISC select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPROCMASK - select GENERIC_COMPAT_RT_SIGPENDING help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 3043194547cd..93b1d089864b 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -160,9 +160,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ec89a7b11f7b..cf90f0526411 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -144,11 +144,6 @@ config PPC select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPROCMASK - select GENERIC_COMPAT_RT_SIGPENDING select OLD_SIGSUSPEND select OLD_SIGACTION if PPC32 diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 1d4864a40e35..f25b5c45c435 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -44,17 +44,13 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #ifdef CONFIG_PPC32 #define __ARCH_WANT_OLD_STAT #endif #ifdef CONFIG_PPC64 #define __ARCH_WANT_COMPAT_SYS_TIME -#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_NEWFSTATAT #define __ARCH_WANT_COMPAT_SYS_SENDFILE -#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bcdcf31fa672..ec12a3582ae9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,11 +140,6 @@ config S390 select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS2 - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPROCMASK - select GENERIC_COMPAT_RT_SIGPENDING select OLD_SIGSUSPEND3 select OLD_SIGACTION diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 636530872516..a6667a952969 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -43,15 +43,12 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND # ifndef CONFIG_64BIT # define __ARCH_WANT_STAT64 # define __ARCH_WANT_SYS_TIME # endif # ifdef CONFIG_COMPAT # define __ARCH_WANT_COMPAT_SYS_TIME -# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND # endif #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK diff --git a/arch/score/Kconfig b/arch/score/Kconfig index a125d7207bcc..3b1482e7afac 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -14,7 +14,6 @@ config SCORE select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_REL select CLONE_BACKWARDS - select GENERIC_SIGALTSTACK choice prompt "System type" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b5fd9f3c9925..5f9b0a3f5f00 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -40,7 +40,6 @@ config SUPERH select GENERIC_STRNLEN_USER select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK select OLD_SIGSUSPEND select OLD_SIGACTION help diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h index 012004ed3330..5e90fa2b7eed 100644 --- a/arch/sh/include/asm/unistd.h +++ b/arch/sh/include/asm/unistd.h @@ -4,7 +4,6 @@ # include # endif -# define __ARCH_WANT_SYS_RT_SIGSUSPEND # define __ARCH_WANT_OLD_READDIR # define __ARCH_WANT_OLD_STAT # define __ARCH_WANT_STAT64 @@ -27,7 +26,6 @@ # define __ARCH_WANT_SYS_OLDUMOUNT # define __ARCH_WANT_SYS_SIGPENDING # define __ARCH_WANT_SYS_SIGPROCMASK -# define __ARCH_WANT_SYS_RT_SIGACTION # define __ARCH_WANT_SYS_FORK # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e557b0821540..9821a0ff9864 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -42,11 +42,6 @@ config SPARC select GENERIC_STRNLEN_USER select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPROCMASK - select GENERIC_COMPAT_RT_SIGPENDING - select GENERIC_COMPAT_RT_SIGACTION select OLD_SIGSUSPEND config SPARC32 diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 87ce24c5eb95..5356810bd7e7 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -38,14 +38,11 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #ifdef __32bit_syscall_numbers__ #define __ARCH_WANT_SYS_IPC #else #define __ARCH_WANT_COMPAT_SYS_TIME -#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_COMPAT_SYS_SENDFILE -#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif /* diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 96a717ebb1fa..875d008828b8 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -21,11 +21,6 @@ config TILE select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPROCMASK - select GENERIC_COMPAT_RT_SIGPENDING # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h index 6ac21034f69a..940831fe9e94 100644 --- a/arch/tile/include/asm/unistd.h +++ b/arch/tile/include/asm/unistd.h @@ -14,7 +14,6 @@ /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */ #ifdef CONFIG_COMPAT #define __ARCH_WANT_SYS_LLSEEK -#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif #define __ARCH_WANT_SYS_NEWFSTATAT #define __ARCH_WANT_SYS_CLONE diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index a62786fdcaab..60651df5f952 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -16,7 +16,6 @@ config UNICORE32 select ARCH_WANT_FRAME_POINTERS select GENERIC_IOMAP select MODULES_USE_ELF_REL - select GENERIC_SIGALTSTACK help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 87d09175a0a9..49fb44e95f3c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -113,10 +113,6 @@ config X86 select MODULES_USE_ELF_REL if X86_32 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPENDING select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index a0790e07ba65..3d5df1c4447f 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -38,8 +38,6 @@ # define __ARCH_WANT_SYS_OLD_GETRLIMIT # define __ARCH_WANT_SYS_OLD_UNAME # define __ARCH_WANT_SYS_PAUSE -# define __ARCH_WANT_SYS_RT_SIGACTION -# define __ARCH_WANT_SYS_RT_SIGSUSPEND # define __ARCH_WANT_SYS_SGETMASK # define __ARCH_WANT_SYS_SIGNAL # define __ARCH_WANT_SYS_SIGPENDING diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index cf0f2731484e..fafc94193bc8 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -13,7 +13,6 @@ endmenu config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT - select GENERIC_SIGALTSTACK config 64BIT bool "64-bit kernel" if SUBARCH = "x86" diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 23cc6ae35da0..5aab1acabf1c 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -16,7 +16,6 @@ config XTENSA select ARCH_WANT_OPTIONAL_GPIOLIB select CLONE_BACKWARDS select IRQ_DOMAIN - select GENERIC_SIGALTSTACK help Xtensa processors are 32-bit RISC machines designed by Tensilica primarily for embedded systems. These processors are both diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index eb63ea87815c..c38834de9ac7 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -15,8 +15,6 @@ #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_LLSEEK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_GETPGRP /* diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index 9e25a3179d6c..1f74be5113b2 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -21,13 +21,6 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long fd, off_t pgoff); #endif -#ifndef CONFIG_GENERIC_SIGALTSTACK -#ifndef sys_sigaltstack -asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, - struct pt_regs *); -#endif -#endif - #ifndef sys_rt_sigreturn asmlinkage long sys_rt_sigreturn(struct pt_regs *regs); #endif diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index a36991ab334e..257c55ec4f77 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -9,9 +9,6 @@ #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_LLSEEK #endif -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND /* * "Conditional" syscalls diff --git a/include/linux/compat.h b/include/linux/compat.h index 8de903587fb9..de095b0462a7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -68,7 +68,6 @@ #ifndef compat_user_stack_pointer #define compat_user_stack_pointer() current_user_stack_pointer() #endif -#ifdef CONFIG_GENERIC_SIGALTSTACK #ifndef compat_sigaltstack /* we'll need that for MIPS */ typedef struct compat_sigaltstack { compat_uptr_t ss_sp; @@ -76,7 +75,6 @@ typedef struct compat_sigaltstack { compat_size_t ss_size; } compat_stack_t; #endif -#endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) @@ -142,7 +140,6 @@ typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION struct compat_sigaction { #ifndef __ARCH_HAS_ODD_SIGACTION compat_uptr_t sa_handler; @@ -156,7 +153,6 @@ struct compat_sigaction { #endif compat_sigset_t sa_mask __packed; }; -#endif /* * These functions operate strictly on struct compat_time* @@ -623,27 +619,19 @@ asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, struct compat_timespec __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat_size_t sigsetsize); -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPROCMASK asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, compat_sigset_t __user *oset, compat_size_t sigsetsize); -#endif -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPENDING asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, compat_size_t sigsetsize); -#endif #ifndef CONFIG_ODD_RT_SIGACTION -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION asmlinkage long compat_sys_rt_sigaction(int, const struct compat_sigaction __user *, struct compat_sigaction __user *, compat_size_t); #endif -#endif -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGQUEUEINFO asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); -#endif asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); @@ -694,13 +682,11 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); -#ifdef CONFIG_GENERIC_SIGALTSTACK asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr); int compat_restore_altstack(const compat_stack_t __user *uss); int __compat_save_altstack(compat_stack_t __user *, unsigned long); -#endif asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 66d298f69f98..313a8e0a6553 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -300,10 +300,8 @@ asmlinkage long sys_personality(unsigned int personality); asmlinkage long sys_sigpending(old_sigset_t __user *set); asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset); -#ifdef CONFIG_GENERIC_SIGALTSTACK asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, struct sigaltstack __user *uoss); -#endif asmlinkage long sys_getitimer(int which, struct itimerval __user *value); asmlinkage long sys_setitimer(int which, diff --git a/kernel/signal.c b/kernel/signal.c index 6919050c8156..2b8282bb487c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2623,7 +2623,6 @@ SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset, } #ifdef CONFIG_COMPAT -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPROCMASK COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, compat_sigset_t __user *, oset, compat_size_t, sigsetsize) { @@ -2661,7 +2660,6 @@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, #endif } #endif -#endif static int do_sigpending(void *set, unsigned long sigsetsize) { @@ -2694,7 +2692,6 @@ SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) } #ifdef CONFIG_COMPAT -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPENDING COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, compat_size_t, sigsetsize) { @@ -2714,7 +2711,6 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, #endif } #endif -#endif #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER @@ -3024,7 +3020,6 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, } #ifdef CONFIG_COMPAT -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGQUEUEINFO COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, compat_pid_t, pid, int, sig, @@ -3037,7 +3032,6 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, return do_rt_sigqueueinfo(pid, sig, &info); } #endif -#endif static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) { @@ -3194,12 +3188,10 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s out: return error; } -#ifdef CONFIG_GENERIC_SIGALTSTACK SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) { return do_sigaltstack(uss, uoss, current_user_stack_pointer()); } -#endif int restore_altstack(const stack_t __user *uss) { @@ -3217,7 +3209,6 @@ int __save_altstack(stack_t __user *uss, unsigned long sp) } #ifdef CONFIG_COMPAT -#ifdef CONFIG_GENERIC_SIGALTSTACK COMPAT_SYSCALL_DEFINE2(sigaltstack, const compat_stack_t __user *, uss_ptr, compat_stack_t __user *, uoss_ptr) @@ -3267,7 +3258,6 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) __put_user(t->sas_ss_size, &uss->ss_size); } #endif -#endif #ifdef __ARCH_WANT_SYS_SIGPENDING @@ -3368,7 +3358,6 @@ out: return ret; } #ifdef CONFIG_COMPAT -#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, const struct compat_sigaction __user *, act, struct compat_sigaction __user *, oact, @@ -3415,7 +3404,6 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, return ret; } #endif -#endif #endif /* !CONFIG_ODD_RT_SIGACTION */ #ifdef CONFIG_OLD_SIGACTION -- cgit v1.2.3 From 7d7e499f7333f68b7e7f67d14b9c1480913b4afb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Jan 2013 12:11:12 +0000 Subject: smpboot: Allow selfparking per cpu threads The stop machine threads are still killed when a cpu goes offline. The reason is that the thread is used to bring the cpu down, so it can't be parked along with the other per cpu threads. Allow a per cpu thread to be excluded from automatic parking, so it can park itself once it's done Add a create callback function as well. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Rusty Russell Cc: Paul McKenney Cc: Srivatsa S. Bhat Cc: Arjan van de Veen Cc: Paul Turner Cc: Richard Weinberger Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130131120741.553993267@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/smpboot.h | 5 +++++ kernel/smpboot.c | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index e0106d8581d3..c65dee059913 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -14,6 +14,8 @@ struct smpboot_thread_data; * @thread_should_run: Check whether the thread should run or not. Called with * preemption disabled. * @thread_fn: The associated thread function + * @create: Optional setup function, called when the thread gets + * created (Not called from the thread context) * @setup: Optional setup function, called when the thread gets * operational the first time * @cleanup: Optional cleanup function, called when the thread @@ -22,6 +24,7 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) + * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ struct smp_hotplug_thread { @@ -29,10 +32,12 @@ struct smp_hotplug_thread { struct list_head list; int (*thread_should_run)(unsigned int cpu); void (*thread_fn)(unsigned int cpu); + void (*create)(unsigned int cpu); void (*setup)(unsigned int cpu); void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); + bool selfparking; const char *thread_comm; }; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d6c5fc054242..d4abac261779 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -183,9 +183,10 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) kfree(td); return PTR_ERR(tsk); } - get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; + if (ht->create) + ht->create(cpu); return 0; } @@ -225,7 +226,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) { struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); - if (tsk) + if (tsk && !ht->selfparking) kthread_park(tsk); } -- cgit v1.2.3 From 978c4172af48f0adc082f8b1d94acb817d947730 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 14 Feb 2013 11:00:16 +0200 Subject: dmaengine.h: remove redundant else keyword dmaengine_device_control returns -ENOSYS in case the dma driver doesn't have such functionality. Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index bfcdecb5d87a..f5939999cb65 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -610,8 +610,8 @@ static inline int dmaengine_device_control(struct dma_chan *chan, { if (chan->device->device_control) return chan->device->device_control(chan, cmd, arg); - else - return -ENOSYS; + + return -ENOSYS; } static inline int dmaengine_slave_config(struct dma_chan *chan, -- cgit v1.2.3 From abf09bed3cceadd809f0356065c2ada6cee90d4a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 Nov 2012 13:17:37 +0100 Subject: s390/mm: implement software dirty bits The s390 architecture is unique in respect to dirty page detection, it uses the change bit in the per-page storage key to track page modifications. All other architectures track dirty bits by means of page table entries. This property of s390 has caused numerous problems in the past, e.g. see git commit ef5d437f71afdf4a "mm: fix XFS oops due to dirty pages without buffers on s390". To avoid future issues in regard to per-page dirty bits convert s390 to a fault based software dirty bit detection mechanism. All user page table entries which are marked as clean will be hardware read-only, even if the pte is supposed to be writable. A write by the user process will trigger a protection fault which will cause the user pte to be marked as dirty and the hardware read-only bit is removed. With this change the dirty bit in the storage key is irrelevant for Linux as a host, but the storage key is still required for KVM guests. The effect is that page_test_and_clear_dirty and the related code can be removed. The referenced bit in the storage key is still used by the page_test_and_clear_young primitive to provide page age information. For page cache pages of mappings with mapping_cap_account_dirty there will not be any change in behavior as the dirty bit tracking already uses read-only ptes to control the amount of dirty pages. Only for swap cache pages and pages of mappings without mapping_cap_account_dirty there can be additional protection faults. To avoid an excessive number of additional faults the mk_pte primitive checks for PageDirty if the pgprot value allows for writes and pre-dirties the pte. That avoids all additional faults for tmpfs and shmem pages until these pages are added to the swap cache. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 22 ------- arch/s390/include/asm/pgtable.h | 131 +++++++++++++++++++++++++++------------- arch/s390/include/asm/sclp.h | 1 - arch/s390/include/asm/setup.h | 16 ++--- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/lib/uaccess_pt.c | 2 +- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/vmem.c | 24 +++----- drivers/s390/char/sclp_cmd.c | 10 +-- include/asm-generic/pgtable.h | 10 --- include/linux/page-flags.h | 8 --- mm/rmap.c | 24 -------- 12 files changed, 112 insertions(+), 140 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a86ad4084073..75ce9b065f9f 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -154,28 +154,6 @@ static inline int page_reset_referenced(unsigned long addr) #define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ #define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ -/* - * Test and clear dirty bit in storage key. - * We can't clear the changed bit atomically. This is a potential - * race against modification of the referenced bit. This function - * should therefore only be called if it is not mapped in any - * address space. - * - * Note that the bit gets set whenever page content is changed. That means - * also when the page is modified by DMA or from inside the kernel. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY -static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped) -{ - unsigned char skey; - - skey = page_get_storage_key(pfn << PAGE_SHIFT); - if (!(skey & _PAGE_CHANGED)) - return 0; - page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped); - return 1; -} - /* * Test and clear referenced bit in storage key. */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a009d4dd70cb..97de1200c849 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -29,6 +29,7 @@ #ifndef __ASSEMBLY__ #include #include +#include #include #include @@ -221,13 +222,15 @@ extern unsigned long MODULES_END; /* Software bits in the page table entry */ #define _PAGE_SWT 0x001 /* SW pte type bit t */ #define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ -#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ -#define _PAGE_SPECIAL 0x010 /* SW associated with special page */ +#define _PAGE_SWC 0x004 /* SW pte changed bit */ +#define _PAGE_SWR 0x008 /* SW pte referenced bit */ +#define _PAGE_SWW 0x010 /* SW pte write bit */ +#define _PAGE_SPECIAL 0x020 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ + _PAGE_SWC | _PAGE_SWR) /* Six different types of pages. */ #define _PAGE_TYPE_EMPTY 0x400 @@ -321,6 +324,7 @@ extern unsigned long MODULES_END; /* Bits in the region table entry */ #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ +#define _REGION_ENTRY_RO 0x200 /* region protection bit */ #define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ @@ -382,9 +386,10 @@ extern unsigned long MODULES_END; */ #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) #define PAGE_RO __pgprot(_PAGE_TYPE_RO) -#define PAGE_RW __pgprot(_PAGE_TYPE_RW) +#define PAGE_RW __pgprot(_PAGE_TYPE_RO | _PAGE_SWW) +#define PAGE_RWC __pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC) -#define PAGE_KERNEL PAGE_RW +#define PAGE_KERNEL PAGE_RWC #define PAGE_SHARED PAGE_KERNEL #define PAGE_COPY PAGE_RO @@ -632,23 +637,23 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ if (bits & _PAGE_CHANGED) - page_set_storage_key(address, skey ^ bits, 1); + page_set_storage_key(address, skey ^ bits, 0); else if (bits) page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ /* Get host changed & referenced bits from pgste */ bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; - /* Clear host bits in pgste. */ + /* Transfer page changed & referenced bit to kvm user bits */ + pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + /* Clear relevant host bits in pgste. */ pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); /* Copy page access key and fetch protection bit to pgste */ pgste_val(pgste) |= (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; - /* Transfer changed and referenced to kvm user bits */ - pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ - /* Transfer changed & referenced to pte sofware bits */ - pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ + /* Transfer referenced bit to pte */ + pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1; #endif return pgste; @@ -661,20 +666,25 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) if (!pte_present(*ptep)) return pgste; + /* Get referenced bit from storage key */ young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); - /* Transfer page referenced bit to pte software bit (host view) */ - if (young || (pgste_val(pgste) & RCP_HR_BIT)) + if (young) + pgste_val(pgste) |= RCP_GR_BIT; + /* Get host referenced bit from pgste */ + if (pgste_val(pgste) & RCP_HR_BIT) { + pgste_val(pgste) &= ~RCP_HR_BIT; + young = 1; + } + /* Transfer referenced bit to kvm user bits and pte */ + if (young) { + pgste_val(pgste) |= KVM_UR_BIT; pte_val(*ptep) |= _PAGE_SWR; - /* Clear host referenced bit in pgste. */ - pgste_val(pgste) &= ~RCP_HR_BIT; - /* Transfer page referenced bit to guest bit in pgste */ - pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ + } #endif return pgste; - } -static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) +static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) { #ifdef CONFIG_PGSTE unsigned long address; @@ -688,10 +698,23 @@ static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) /* Set page access key and fetch protection bit from pgste */ nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; if (okey != nkey) - page_set_storage_key(address, nkey, 1); + page_set_storage_key(address, nkey, 0); #endif } +static inline void pgste_set_pte(pte_t *ptep, pte_t entry) +{ + if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) { + /* + * Without enhanced suppression-on-protection force + * the dirty bit on for all writable ptes. + */ + pte_val(entry) |= _PAGE_SWC; + pte_val(entry) &= ~_PAGE_RO; + } + *ptep = entry; +} + /** * struct gmap_struct - guest address space * @mm: pointer to the parent mm_struct @@ -750,11 +773,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste_set_pte(ptep, pgste, entry); - *ptep = entry; + pgste_set_key(ptep, pgste, entry); + pgste_set_pte(ptep, entry); pgste_set_unlock(ptep, pgste); - } else + } else { + if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) + pte_val(entry) |= _PAGE_CO; *ptep = entry; + } } /* @@ -763,16 +789,12 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, */ static inline int pte_write(pte_t pte) { - return (pte_val(pte) & _PAGE_RO) == 0; + return (pte_val(pte) & _PAGE_SWW) != 0; } static inline int pte_dirty(pte_t pte) { -#ifdef CONFIG_PGSTE - if (pte_val(pte) & _PAGE_SWC) - return 1; -#endif - return 0; + return (pte_val(pte) & _PAGE_SWC) != 0; } static inline int pte_young(pte_t pte) @@ -822,11 +844,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) |= pgprot_val(newprot); + if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW)) + pte_val(pte) &= ~_PAGE_RO; return pte; } static inline pte_t pte_wrprotect(pte_t pte) { + pte_val(pte) &= ~_PAGE_SWW; /* Do not clobber _PAGE_TYPE_NONE pages! */ if (!(pte_val(pte) & _PAGE_INVALID)) pte_val(pte) |= _PAGE_RO; @@ -835,20 +860,26 @@ static inline pte_t pte_wrprotect(pte_t pte) static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_SWW; + if (pte_val(pte) & _PAGE_SWC) + pte_val(pte) &= ~_PAGE_RO; return pte; } static inline pte_t pte_mkclean(pte_t pte) { -#ifdef CONFIG_PGSTE pte_val(pte) &= ~_PAGE_SWC; -#endif + /* Do not clobber _PAGE_TYPE_NONE pages! */ + if (!(pte_val(pte) & _PAGE_INVALID)) + pte_val(pte) |= _PAGE_RO; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { + pte_val(pte) |= _PAGE_SWC; + if (pte_val(pte) & _PAGE_SWW) + pte_val(pte) &= ~_PAGE_RO; return pte; } @@ -886,10 +917,10 @@ static inline pte_t pte_mkhuge(pte_t pte) pte_val(pte) |= _SEGMENT_ENTRY_INV; } /* - * Clear SW pte bits SWT and SWX, there are no SW bits in a segment - * table entry. + * Clear SW pte bits, there are no SW bits in a segment table entry. */ - pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); + pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX | _PAGE_SWC | + _PAGE_SWR | _PAGE_SWW); /* * Also set the change-override bit because we don't need dirty bit * tracking for hugetlbfs pages. @@ -1041,9 +1072,11 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long address, pte_t *ptep, pte_t pte) { - *ptep = pte; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { + pgste_set_pte(ptep, pte); pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); + } else + *ptep = pte; } #define __HAVE_ARCH_PTEP_CLEAR_FLUSH @@ -1111,10 +1144,13 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); - *ptep = pte_wrprotect(pte); + pte = pte_wrprotect(pte); - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { + pgste_set_pte(ptep, pte); pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; } return pte; } @@ -1132,10 +1168,12 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, pgste = pgste_get_lock(ptep); __ptep_ipte(address, ptep); - *ptep = entry; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { + pgste_set_pte(ptep, entry); pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; return 1; } @@ -1153,8 +1191,13 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) { unsigned long physpage = page_to_phys(page); + pte_t __pte = mk_pte_phys(physpage, pgprot); - return mk_pte_phys(physpage, pgprot); + if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) { + pte_val(__pte) |= _PAGE_SWC; + pte_val(__pte) &= ~_PAGE_RO; + } + return __pte; } #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) @@ -1246,6 +1289,8 @@ static inline int pmd_trans_splitting(pmd_t pmd) static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) + pmd_val(entry) |= _SEGMENT_ENTRY_CO; *pmdp = entry; } diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 833788693f09..06a136136047 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -46,7 +46,6 @@ int sclp_cpu_deconfigure(u8 cpu); void sclp_facilities_detect(void); unsigned long long sclp_get_rnmax(void); unsigned long long sclp_get_rzm(void); -u8 sclp_get_fac85(void); int sclp_sdias_blk_count(void); int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); int sclp_chp_configure(struct chp_id chpid); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index f69f76b3447a..f6857516e523 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -64,13 +64,14 @@ extern unsigned int s390_user_mode; #define MACHINE_FLAG_VM (1UL << 0) #define MACHINE_FLAG_IEEE (1UL << 1) -#define MACHINE_FLAG_CSP (1UL << 3) -#define MACHINE_FLAG_MVPG (1UL << 4) -#define MACHINE_FLAG_DIAG44 (1UL << 5) -#define MACHINE_FLAG_IDTE (1UL << 6) -#define MACHINE_FLAG_DIAG9C (1UL << 7) -#define MACHINE_FLAG_MVCOS (1UL << 8) -#define MACHINE_FLAG_KVM (1UL << 9) +#define MACHINE_FLAG_CSP (1UL << 2) +#define MACHINE_FLAG_MVPG (1UL << 3) +#define MACHINE_FLAG_DIAG44 (1UL << 4) +#define MACHINE_FLAG_IDTE (1UL << 5) +#define MACHINE_FLAG_DIAG9C (1UL << 6) +#define MACHINE_FLAG_MVCOS (1UL << 7) +#define MACHINE_FLAG_KVM (1UL << 8) +#define MACHINE_FLAG_ESOP (1UL << 9) #define MACHINE_FLAG_EDAT1 (1UL << 10) #define MACHINE_FLAG_EDAT2 (1UL << 11) #define MACHINE_FLAG_LPAR (1UL << 12) @@ -84,6 +85,7 @@ extern unsigned int s390_user_mode; #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) +#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP) #define MACHINE_HAS_PFMF MACHINE_HAS_EDAT1 #define MACHINE_HAS_HPAGE MACHINE_HAS_EDAT1 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f090e819bf71..2923781590a6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -147,7 +147,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; case KVM_CAP_S390_COW: - r = sclp_get_fac85() & 0x2; + r = MACHINE_HAS_ESOP; break; default: r = 0; diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 9017a63dda3d..a70ee84c0241 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -50,7 +50,7 @@ static __always_inline unsigned long follow_table(struct mm_struct *mm, ptep = pte_offset_map(pmd, addr); if (!pte_present(*ptep)) return -0x11UL; - if (write && !pte_write(*ptep)) + if (write && (!pte_write(*ptep) || !pte_dirty(*ptep))) return -0x04UL; return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 29ccee3651f4..d21040ed5e59 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -127,7 +127,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) pte_val(*pte) = _PAGE_TYPE_EMPTY; continue; } - *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); + pte_val(*pte) = __pa(address); } } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 6ed1426d27c5..79699f46a443 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -85,11 +85,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; - pte_t pte; int ret = -ENOMEM; while (address < end) { - pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { pu_dir = vmem_pud_alloc(); @@ -101,9 +99,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { - pte_val(pte) |= _REGION3_ENTRY_LARGE; - pte_val(pte) |= _REGION_ENTRY_TYPE_R3; - pud_val(*pu_dir) = pte_val(pte); + pud_val(*pu_dir) = __pa(address) | + _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | + (ro ? _REGION_ENTRY_RO : 0); address += PUD_SIZE; continue; } @@ -118,8 +116,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { - pte_val(pte) |= _SEGMENT_ENTRY_LARGE; - pmd_val(*pm_dir) = pte_val(pte); + pmd_val(*pm_dir) = __pa(address) | + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | + (ro ? _SEGMENT_ENTRY_RO : 0); address += PMD_SIZE; continue; } @@ -132,7 +131,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } pt_dir = pte_offset_kernel(pm_dir, address); - *pt_dir = pte; + pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0); address += PAGE_SIZE; } ret = 0; @@ -199,7 +198,6 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; - pte_t pte; int ret = -ENOMEM; start_addr = (unsigned long) start; @@ -237,9 +235,8 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) new_page = vmemmap_alloc_block(PMD_SIZE, node); if (!new_page) goto out; - pte = mk_pte_phys(__pa(new_page), PAGE_RW); - pte_val(pte) |= _SEGMENT_ENTRY_LARGE; - pmd_val(*pm_dir) = pte_val(pte); + pmd_val(*pm_dir) = __pa(new_page) | + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE; address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -260,8 +257,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) new_page =__pa(vmem_alloc_pages(0)); if (!new_page) goto out; - pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); - *pt_dir = pte; + pte_val(*pt_dir) = __pa(new_page); } address += PAGE_SIZE; } diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index c44d13f607bc..30a2255389e5 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -56,7 +56,6 @@ static int __initdata early_read_info_sccb_valid; u64 sclp_facilities; static u8 sclp_fac84; -static u8 sclp_fac85; static unsigned long long rzm; static unsigned long long rnmax; @@ -131,7 +130,8 @@ void __init sclp_facilities_detect(void) sccb = &early_read_info_sccb; sclp_facilities = sccb->facilities; sclp_fac84 = sccb->fac84; - sclp_fac85 = sccb->fac85; + if (sccb->fac85 & 0x02) + S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; rzm <<= 20; @@ -171,12 +171,6 @@ unsigned long long sclp_get_rzm(void) return rzm; } -u8 sclp_get_fac85(void) -{ - return sclp_fac85; -} -EXPORT_SYMBOL_GPL(sclp_get_fac85); - /* * This function will be called after sclp_facilities_detect(), which gets * called from early.c code. Therefore the sccb should have valid contents. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 5cf680a98f9b..bfd87685fc1f 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -197,16 +197,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY -#define page_test_and_clear_dirty(pfn, mapped) (0) -#endif - -#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY -#define pte_maybe_dirty(pte) pte_dirty(pte) -#else -#define pte_maybe_dirty(pte) (1) -#endif - #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG #define page_test_and_clear_young(pfn) (0) #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 70473da47b3f..6d53675c2b54 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -303,21 +303,13 @@ static inline void __SetPageUptodate(struct page *page) static inline void SetPageUptodate(struct page *page) { -#ifdef CONFIG_S390 - if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, 0); -#else /* * Memory barrier must be issued before setting the PG_uptodate bit, * so that all previous stores issued in order to bring the page * uptodate are actually visible before PageUptodate becomes true. - * - * s390 doesn't need an explicit smp_wmb here because the test and - * set bit already provides full barriers. */ smp_wmb(); set_bit(PG_uptodate, &(page)->flags); -#endif } CLEARPAGEFLAG(Uptodate, uptodate) diff --git a/mm/rmap.c b/mm/rmap.c index 2c78f8cadc95..3d38edffda41 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1126,7 +1126,6 @@ void page_add_file_rmap(struct page *page) */ void page_remove_rmap(struct page *page) { - struct address_space *mapping = page_mapping(page); bool anon = PageAnon(page); bool locked; unsigned long flags; @@ -1143,29 +1142,6 @@ void page_remove_rmap(struct page *page) if (!atomic_add_negative(-1, &page->_mapcount)) goto out; - /* - * Now that the last pte has gone, s390 must transfer dirty - * flag from storage key to struct page. We can usually skip - * this if the page is anon, so about to be freed; but perhaps - * not if it's in swapcache - there might be another pte slot - * containing the swap entry, but page not yet written to swap. - * - * And we can skip it on file pages, so long as the filesystem - * participates in dirty tracking (note that this is not only an - * optimization but also solves problems caused by dirty flag in - * storage key getting set by a write from inside kernel); but need to - * catch shm and tmpfs and ramfs pages which have been modified since - * creation by read fault. - * - * Note that mapping must be decided above, before decrementing - * mapcount (which luckily provides a barrier): once page is unmapped, - * it could be truncated and page->mapping reset to NULL at any moment. - * Note also that we are relying on page_mapping(page) to set mapping - * to &swapper_space when PageSwapCache(page). - */ - if (mapping && !mapping_cap_account_dirty(mapping) && - page_test_and_clear_dirty(page_to_pfn(page), 1)) - set_page_dirty(page); /* * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED * and not charged by memcg for now. -- cgit v1.2.3 From 878ec67b3ac528a2b6d44055f049cdbb9cfda30c Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 14 Feb 2013 17:39:08 +0100 Subject: regmap: mmio: add register clock support Some mmio devices have a dedicated interface clock that needs to be enabled to access their registers. This patch optionally enables a clock before accessing registers in the regmap_bus callbacks. I added (devm_)regmap_init_mmio_clk variants of the init functions that have an added clk_id string parameter. This is passed to clk_get to request the clock from the clk framework. Signed-off-by: Philipp Zabel Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-mmio.c | 79 ++++++++++++++++++++++++++++++++------- include/linux/regmap.h | 47 ++++++++++++++++++++--- 2 files changed, 107 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c index f05fc74dd84a..98745dd77e8c 100644 --- a/drivers/base/regmap/regmap-mmio.c +++ b/drivers/base/regmap/regmap-mmio.c @@ -16,6 +16,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -26,6 +27,7 @@ struct regmap_mmio_context { void __iomem *regs; unsigned val_bytes; + struct clk *clk; }; static int regmap_mmio_gather_write(void *context, @@ -34,9 +36,16 @@ static int regmap_mmio_gather_write(void *context, { struct regmap_mmio_context *ctx = context; u32 offset; + int ret; BUG_ON(reg_size != 4); + if (ctx->clk) { + ret = clk_enable(ctx->clk); + if (ret < 0) + return ret; + } + offset = *(u32 *)reg; while (val_size) { @@ -64,6 +73,9 @@ static int regmap_mmio_gather_write(void *context, offset += ctx->val_bytes; } + if (ctx->clk) + clk_disable(ctx->clk); + return 0; } @@ -80,9 +92,16 @@ static int regmap_mmio_read(void *context, { struct regmap_mmio_context *ctx = context; u32 offset; + int ret; BUG_ON(reg_size != 4); + if (ctx->clk) { + ret = clk_enable(ctx->clk); + if (ret < 0) + return ret; + } + offset = *(u32 *)reg; while (val_size) { @@ -110,11 +129,20 @@ static int regmap_mmio_read(void *context, offset += ctx->val_bytes; } + if (ctx->clk) + clk_disable(ctx->clk); + return 0; } static void regmap_mmio_free_context(void *context) { + struct regmap_mmio_context *ctx = context; + + if (ctx->clk) { + clk_unprepare(ctx->clk); + clk_put(ctx->clk); + } kfree(context); } @@ -128,11 +156,14 @@ static struct regmap_bus regmap_mmio = { .val_format_endian_default = REGMAP_ENDIAN_NATIVE, }; -static struct regmap_mmio_context *regmap_mmio_gen_context(void __iomem *regs, +static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev, + const char *clk_id, + void __iomem *regs, const struct regmap_config *config) { struct regmap_mmio_context *ctx; int min_stride; + int ret; if (config->reg_bits != 32) return ERR_PTR(-EINVAL); @@ -179,37 +210,59 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(void __iomem *regs, ctx->regs = regs; ctx->val_bytes = config->val_bits / 8; + if (clk_id == NULL) + return ctx; + + ctx->clk = clk_get(dev, clk_id); + if (IS_ERR(ctx->clk)) { + ret = PTR_ERR(ctx->clk); + goto err_free; + } + + ret = clk_prepare(ctx->clk); + if (ret < 0) { + clk_put(ctx->clk); + goto err_free; + } + return ctx; + +err_free: + kfree(ctx); + + return ERR_PTR(ret); } /** - * regmap_init_mmio(): Initialise register map + * regmap_init_mmio_clk(): Initialise register map with register clock * * @dev: Device that will be interacted with + * @clk_id: register clock consumer ID * @regs: Pointer to memory-mapped IO region * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -struct regmap *regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config) +struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id, + void __iomem *regs, + const struct regmap_config *config) { struct regmap_mmio_context *ctx; - ctx = regmap_mmio_gen_context(regs, config); + ctx = regmap_mmio_gen_context(dev, clk_id, regs, config); if (IS_ERR(ctx)) return ERR_CAST(ctx); return regmap_init(dev, ®map_mmio, ctx, config); } -EXPORT_SYMBOL_GPL(regmap_init_mmio); +EXPORT_SYMBOL_GPL(regmap_init_mmio_clk); /** - * devm_regmap_init_mmio(): Initialise managed register map + * devm_regmap_init_mmio_clk(): Initialise managed register map with clock * * @dev: Device that will be interacted with + * @clk_id: register clock consumer ID * @regs: Pointer to memory-mapped IO region * @config: Configuration for register map * @@ -217,18 +270,18 @@ EXPORT_SYMBOL_GPL(regmap_init_mmio); * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -struct regmap *devm_regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config) +struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id, + void __iomem *regs, + const struct regmap_config *config) { struct regmap_mmio_context *ctx; - ctx = regmap_mmio_gen_context(regs, config); + ctx = regmap_mmio_gen_context(dev, clk_id, regs, config); if (IS_ERR(ctx)) return ERR_CAST(ctx); return devm_regmap_init(dev, ®map_mmio, ctx, config); } -EXPORT_SYMBOL_GPL(devm_regmap_init_mmio); +EXPORT_SYMBOL_GPL(devm_regmap_init_mmio_clk); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index b7e95bf942c9..f0480a3297e4 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -285,9 +285,9 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c, const struct regmap_config *config); struct regmap *regmap_init_spi(struct spi_device *dev, const struct regmap_config *config); -struct regmap *regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config); +struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id, + void __iomem *regs, + const struct regmap_config *config); struct regmap *devm_regmap_init(struct device *dev, const struct regmap_bus *bus, @@ -297,9 +297,44 @@ struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c, const struct regmap_config *config); struct regmap *devm_regmap_init_spi(struct spi_device *dev, const struct regmap_config *config); -struct regmap *devm_regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config); +struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id, + void __iomem *regs, + const struct regmap_config *config); + +/** + * regmap_init_mmio(): Initialise register map + * + * @dev: Device that will be interacted with + * @regs: Pointer to memory-mapped IO region + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +static inline struct regmap *regmap_init_mmio(struct device *dev, + void __iomem *regs, + const struct regmap_config *config) +{ + return regmap_init_mmio_clk(dev, NULL, regs, config); +} + +/** + * devm_regmap_init_mmio(): Initialise managed register map + * + * @dev: Device that will be interacted with + * @regs: Pointer to memory-mapped IO region + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +static inline struct regmap *devm_regmap_init_mmio(struct device *dev, + void __iomem *regs, + const struct regmap_config *config) +{ + return devm_regmap_init_mmio_clk(dev, NULL, regs, config); +} void regmap_exit(struct regmap *map); int regmap_reinit_cache(struct regmap *map, -- cgit v1.2.3 From 56cc6cb707fedd897d391f6e50e3b56d62d6683f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Feb 2013 23:26:56 +0100 Subject: ARM defconfigs: add missing inclusions of linux/platform_device.h Patch 16559ae "kgdb: remove #include from kgdb.h" removed an implicit inclusion of linux/platform_device.h In a number of places. This adds back explicit inclusions in a few more places I found. Signed-off-by: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Florian Tobias Schandinat Signed-off-by: Greg Kroah-Hartman --- arch/arm/plat-samsung/include/plat/adc.h | 1 + drivers/video/clps711xfb.c | 1 + include/linux/platform_data/s3c-hsotg.h | 2 ++ 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/plat-samsung/include/plat/adc.h b/arch/arm/plat-samsung/include/plat/adc.h index b258a08de591..2fc89315553f 100644 --- a/arch/arm/plat-samsung/include/plat/adc.h +++ b/arch/arm/plat-samsung/include/plat/adc.h @@ -15,6 +15,7 @@ #define __ASM_PLAT_ADC_H __FILE__ struct s3c_adc_client; +struct platform_device; extern int s3c_adc_start(struct s3c_adc_client *client, unsigned int channel, unsigned int nr_samples); diff --git a/drivers/video/clps711xfb.c b/drivers/video/clps711xfb.c index 5a7af0deced2..f00980607b8f 100644 --- a/drivers/video/clps711xfb.c +++ b/drivers/video/clps711xfb.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/platform_data/s3c-hsotg.h b/include/linux/platform_data/s3c-hsotg.h index 8b79e0967f9c..3f1cbf95ec3b 100644 --- a/include/linux/platform_data/s3c-hsotg.h +++ b/include/linux/platform_data/s3c-hsotg.h @@ -15,6 +15,8 @@ #ifndef __LINUX_USB_S3C_HSOTG_H #define __LINUX_USB_S3C_HSOTG_H +struct platform_device; + enum s3c_hsotg_dmamode { S3C_HSOTG_DMA_NONE, /* do not use DMA at-all */ S3C_HSOTG_DMA_ONLY, /* always use DMA */ -- cgit v1.2.3 From 7bf9b9a0f0372d45b581f00173505fb76a9c5d23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Dec 2012 18:45:41 +0100 Subject: wireless: define operating mode action frame Define the action frame format, the VHT category and its action types and the field format and EID for operating mode notifications. The frame may be used outside of VHT context as well, so don't include "VHT" in the names. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7e8a498efe6d..67c1a6c45837 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -714,6 +714,30 @@ enum ieee80211_ht_chanwidth_values { IEEE80211_HT_CHANWIDTH_ANY = 1, }; +/** + * enum ieee80211_opmode_bits - VHT operating mode field bits + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width + * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width + * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask + * (the NSS value is the value of this field + 1) + * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift + * @IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF: indicates streams in SU-MIMO PPDU + * using a beamforming steering matrix + */ +enum ieee80211_vht_opmode_bits { + IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK = 3, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ = 0, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ = 1, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ = 2, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ = 3, + IEEE80211_OPMODE_NOTIF_RX_NSS_MASK = 0x70, + IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT = 4, + IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF = 0x80, +}; + #define WLAN_SA_QUERY_TR_ID_LEN 2 struct ieee80211_mgmt { @@ -844,6 +868,10 @@ struct ieee80211_mgmt { __le16 capability; u8 variable[0]; } __packed tdls_discover_resp; + struct { + u8 action_code; + u8 operating_mode; + } __packed vht_opmode_notif; } u; } __packed action; } u; @@ -1598,6 +1626,7 @@ enum ieee80211_eid { WLAN_EID_VHT_CAPABILITY = 191, WLAN_EID_VHT_OPERATION = 192, + WLAN_EID_OPMODE_NOTIF = 199, /* 802.11ad */ WLAN_EID_NON_TX_BSSID_CAP = 83, @@ -1652,6 +1681,7 @@ enum ieee80211_category { WLAN_CATEGORY_WMM = 17, WLAN_CATEGORY_FST = 18, WLAN_CATEGORY_UNPROT_DMG = 20, + WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -1677,6 +1707,13 @@ enum ieee80211_ht_actioncode { WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7, }; +/* VHT action codes */ +enum ieee80211_vht_actioncode { + WLAN_VHT_ACTION_COMPRESSED_BF = 0, + WLAN_VHT_ACTION_GROUPID_MGMT = 1, + WLAN_VHT_ACTION_OPMODE_NOTIF = 2, +}; + /* Self Protected Action codes */ enum ieee80211_self_protected_actioncode { WLAN_SP_RESERVED = 0, -- cgit v1.2.3 From 0af83d3df5863224336a18c24a14fda542b712f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Dec 2012 18:55:36 +0100 Subject: mac80211: handle VHT operating mode notification Handle the operating mode notification action frame. When the supported streams or the bandwidth change let the driver and rate control algorithm know. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + include/net/mac80211.h | 3 ++ net/mac80211/ht.c | 4 ++ net/mac80211/ieee80211_i.h | 3 ++ net/mac80211/rx.c | 30 +++++++++++++++ net/mac80211/sta_info.h | 4 ++ net/mac80211/vht.c | 93 +++++++++++++++++++++++++++++++++++++++++----- 7 files changed, 128 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 67c1a6c45837..12b5996533ec 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1301,6 +1301,7 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 0x00000002 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ 0x00000004 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ 0x00000008 +#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK 0x0000000C #define IEEE80211_VHT_CAP_RXLDPC 0x00000010 #define IEEE80211_VHT_CAP_SHORT_GI_80 0x00000020 #define IEEE80211_VHT_CAP_SHORT_GI_160 0x00000040 diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a608ab9879b4..b7fb311e83f4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2118,11 +2118,14 @@ enum ieee80211_frame_release_type { * @IEEE80211_RC_SUPP_RATES_CHANGED: The supported rate set of this peer * changed (in IBSS mode) due to discovering more information about * the peer. + * @IEEE80211_RC_NSS_CHANGED: N_SS (number of spatial streams) was changed + * by the peer */ enum ieee80211_rate_control_changed { IEEE80211_RC_BW_CHANGED = BIT(0), IEEE80211_RC_SMPS_CHANGED = BIT(1), IEEE80211_RC_SUPP_RATES_CHANGED = BIT(2), + IEEE80211_RC_NSS_CHANGED = BIT(3), }; /** diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a64b4f0d373f..797969bc26e1 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -212,6 +212,10 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, changed = true; sta->sta.bandwidth = bw; + sta->cur_max_bandwidth = + ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + return changed; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3b13af4e6c49..4947c91c6c86 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1433,6 +1433,9 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8a861a50b12f..1617e0bd4ca6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2435,6 +2435,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto invalid; } + break; + case WLAN_CATEGORY_VHT: + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + break; + + /* verify action code is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + goto invalid; + + switch (mgmt->u.action.u.vht_opmode_notif.action_code) { + case WLAN_VHT_ACTION_OPMODE_NOTIF: { + u8 opmode; + + /* verify opmode is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 2) + goto invalid; + + opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + + ieee80211_vht_handle_opmode(rx->sdata, rx->sta, + opmode, status->band); + goto handled; + } + default: + break; + } break; case WLAN_CATEGORY_BACK: if (sdata->vif.type != NL80211_IFTYPE_STATION && diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 03c42f8d39f0..63dfdb5e91da 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -297,6 +297,8 @@ struct sta_ampdu_mlme { * @sta_state: duplicates information about station state (for debug) * @beacon_loss_count: number of times beacon loss has triggered * @rcu_head: RCU head used for freeing this station struct + * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, + * taken from HT/VHT capabilities or VHT operating mode notification */ struct sta_info { /* General information, mostly static */ @@ -398,6 +400,8 @@ struct sta_info { } debugfs; #endif + enum ieee80211_sta_rx_bandwidth cur_max_bandwidth; + unsigned int lost_packets; unsigned int beacon_loss_count; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 67436e3efbbd..0951f74e7ff5 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -10,6 +10,7 @@ #include #include #include "ieee80211_i.h" +#include "rate.h" void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, @@ -39,6 +40,15 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + default: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + } + sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); } @@ -46,12 +56,13 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 cap = sta->sta.vht_cap.cap; + enum ieee80211_sta_rx_bandwidth bw; - if (!sta->sta.vht_cap.vht_supported) - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + if (!sta->sta.vht_cap.vht_supported) { + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; - - /* TODO: handle VHT opmode notification data */ + goto check_max; + } switch (sdata->vif.bss_conf.chandef.width) { default: @@ -60,19 +71,31 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: - return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; + break; case NL80211_CHAN_WIDTH_160: - if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) - return IEEE80211_STA_RX_BW_160; + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } /* fall through */ case NL80211_CHAN_WIDTH_80P80: - if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) - return IEEE80211_STA_RX_BW_160; + if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) { + bw = IEEE80211_STA_RX_BW_160; + break; + } /* fall through */ case NL80211_CHAN_WIDTH_80: - return IEEE80211_STA_RX_BW_80; + bw = IEEE80211_STA_RX_BW_80; } + + check_max: + if (bw > sta->cur_max_bandwidth) + bw = sta->cur_max_bandwidth; + return bw; } void ieee80211_sta_set_rx_nss(struct sta_info *sta) @@ -115,3 +138,53 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) ht_rx_nss = max(ht_rx_nss, vht_rx_nss); sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss); } + +void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 opmode, + enum ieee80211_band band) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + enum ieee80211_sta_rx_bandwidth new_bw; + u32 changed = 0; + u8 nss; + + sband = local->hw.wiphy->bands[band]; + + /* ignore - no support for BF yet */ + if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF) + return; + + nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; + nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; + nss += 1; + + if (sta->sta.rx_nss != nss) { + sta->sta.rx_nss = nss; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + break; + case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + break; + } + + new_bw = ieee80211_sta_cur_vht_bw(sta); + if (new_bw != sta->sta.bandwidth) { + sta->sta.bandwidth = new_bw; + changed |= IEEE80211_RC_NSS_CHANGED; + } + + if (changed) + rate_control_rate_update(local, sband, sta, changed); +} -- cgit v1.2.3 From 4a3cb702b05868f67c4ee3da3380461c5b90b4ca Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 12 Feb 2013 16:43:19 +0100 Subject: mac80211: constify IE parsing Make all the parsed IE pointers const, and propagate the change to all the users etc. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- net/mac80211/ht.c | 2 +- net/mac80211/ieee80211_i.h | 87 +++++++++++++++++++++++---------------------- net/mac80211/mesh.h | 11 +++--- net/mac80211/mesh_hwmp.c | 42 ++++++++++++---------- net/mac80211/mesh_pathtbl.c | 11 +++--- net/mac80211/mlme.c | 15 ++++---- net/mac80211/util.c | 4 +-- net/mac80211/vht.c | 9 ++--- 9 files changed, 96 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 12b5996533ec..e085fcf52b26 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2152,7 +2152,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) * @tim_len: length of the TIM IE * @aid: the AID to look for */ -static inline bool ieee80211_check_tim(struct ieee80211_tim_ie *tim, +static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) { u8 mask; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 797969bc26e1..b84147ac5b4c 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -94,7 +94,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, + const struct ieee80211_ht_cap *ht_cap_ie, struct sta_info *sta) { struct ieee80211_sta_ht_cap ht_cap; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d1074442f1b5..d702f7dc321b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1163,41 +1163,41 @@ struct ieee80211_ra_tid { /* Parsed Information Elements */ struct ieee802_11_elems { - u8 *ie_start; + const u8 *ie_start; size_t total_len; /* pointers to IEs */ - u8 *ssid; - u8 *supp_rates; - u8 *fh_params; - u8 *ds_params; - u8 *cf_params; - struct ieee80211_tim_ie *tim; - u8 *ibss_params; - u8 *challenge; - u8 *rsn; - u8 *erp_info; - u8 *ext_supp_rates; - u8 *wmm_info; - u8 *wmm_param; - struct ieee80211_ht_cap *ht_cap_elem; - struct ieee80211_ht_operation *ht_operation; - struct ieee80211_vht_cap *vht_cap_elem; - struct ieee80211_vht_operation *vht_operation; - struct ieee80211_meshconf_ie *mesh_config; - u8 *mesh_id; - u8 *peering; - __le16 *awake_window; - u8 *preq; - u8 *prep; - u8 *perr; - struct ieee80211_rann_ie *rann; - struct ieee80211_channel_sw_ie *ch_switch_ie; - u8 *country_elem; - u8 *pwr_constr_elem; - u8 *quiet_elem; /* first quite element */ - u8 *timeout_int; - u8 *opmode_notif; + const u8 *ssid; + const u8 *supp_rates; + const u8 *fh_params; + const u8 *ds_params; + const u8 *cf_params; + const struct ieee80211_tim_ie *tim; + const u8 *ibss_params; + const u8 *challenge; + const u8 *rsn; + const u8 *erp_info; + const u8 *ext_supp_rates; + const u8 *wmm_info; + const u8 *wmm_param; + const struct ieee80211_ht_cap *ht_cap_elem; + const struct ieee80211_ht_operation *ht_operation; + const struct ieee80211_vht_cap *vht_cap_elem; + const struct ieee80211_vht_operation *vht_operation; + const struct ieee80211_meshconf_ie *mesh_config; + const u8 *mesh_id; + const u8 *peering; + const __le16 *awake_window; + const u8 *preq; + const u8 *prep; + const u8 *perr; + const struct ieee80211_rann_ie *rann; + const struct ieee80211_channel_sw_ie *ch_switch_ie; + const u8 *country_elem; + const u8 *pwr_constr_elem; + const u8 *quiet_elem; /* first quite element */ + const u8 *timeout_int; + const u8 *opmode_notif; /* length of them, respectively */ u8 ssid_len; @@ -1276,10 +1276,10 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp); +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp); void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); @@ -1387,7 +1387,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct ieee80211_ht_cap *ht_cap_ie, + const struct ieee80211_ht_cap *ht_cap_ie, struct sta_info *sta); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, @@ -1428,10 +1428,11 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs); /* VHT */ -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct sta_info *sta); +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -1555,7 +1556,7 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *bssid, + const u8 *extra, size_t extra_len, const u8 *bssid, const u8 *da, const u8 *key, u8 key_len, u8 key_idx, u32 tx_flags); void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, @@ -1606,7 +1607,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, /* channel management */ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); int __must_check diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7ad035f0cacc..a1bad310f2e9 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -265,8 +265,8 @@ int mesh_nexthop_lookup(struct sk_buff *skb, int mesh_nexthop_resolve(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); -struct mesh_path *mesh_path_lookup(u8 *dst, - struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup(const u8 *dst, + struct ieee80211_sub_if_data *sdata); struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata); int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata); @@ -276,7 +276,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); void mesh_path_expire(struct ieee80211_sub_if_data *sdata); void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); +int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata); int mesh_path_add_gate(struct mesh_path *mpath); int mesh_path_send_to_gates(struct mesh_path *mpath); @@ -301,8 +301,9 @@ void mesh_sta_cleanup(struct sta_info *sta); void mesh_mpath_table_grow(void); void mesh_mpp_table_grow(void); /* Mesh paths */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, - const u8 *ra, struct ieee80211_sub_if_data *sdata); +int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn, + __le16 target_rcode, const u8 *ra, + struct ieee80211_sub_if_data *sdata); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index f0dd8742ed42..585c1e26cca8 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -30,14 +30,14 @@ static void mesh_queue_preq(struct mesh_path *, u8); -static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; return get_unaligned_le32(preq_elem + offset); } -static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) +static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae) { if (ae) offset += 6; @@ -102,10 +102,13 @@ enum mpath_frame_type { static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target, - __le32 target_sn, const u8 *da, u8 hop_count, u8 ttl, - __le32 lifetime, __le32 metric, __le32 preq_id, - struct ieee80211_sub_if_data *sdata) + const u8 *orig_addr, __le32 orig_sn, + u8 target_flags, const u8 *target, + __le32 target_sn, const u8 *da, + u8 hop_count, u8 ttl, + __le32 lifetime, __le32 metric, + __le32 preq_id, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -235,7 +238,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, * also acquires in the TX path. To avoid a deadlock we don't transmit the * frame directly but add it to the pending queue instead. */ -int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, +int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn, __le16 target_rcode, const u8 *ra, struct ieee80211_sub_if_data *sdata) { @@ -369,14 +372,14 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * path routing information is updated. */ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - u8 *hwmp_ie, enum mpath_frame_type action) + struct ieee80211_mgmt *mgmt, + const u8 *hwmp_ie, enum mpath_frame_type action) { struct ieee80211_local *local = sdata->local; struct mesh_path *mpath; struct sta_info *sta; bool fresh_info; - u8 *orig_addr, *ta; + const u8 *orig_addr, *ta; u32 orig_sn, orig_metric; unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; @@ -511,11 +514,11 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *preq_elem, u32 metric) + const u8 *preq_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath = NULL; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; const u8 *da; u8 target_flags, ttl, flags; u32 orig_sn, target_sn, lifetime, orig_metric; @@ -648,11 +651,11 @@ next_hop_deref_protected(struct mesh_path *mpath) static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *prep_elem, u32 metric) + const u8 *prep_elem, u32 metric) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; - u8 *target_addr, *orig_addr; + const u8 *target_addr, *orig_addr; u8 ttl, hopcount, flags; u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; @@ -711,12 +714,13 @@ fail: } static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, u8 *perr_elem) + struct ieee80211_mgmt *mgmt, + const u8 *perr_elem) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; u8 ttl; - u8 *ta, *target_addr; + const u8 *ta, *target_addr; u32 target_sn; u16 target_rcode; @@ -758,15 +762,15 @@ endperr: } static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - struct ieee80211_rann_ie *rann) + struct ieee80211_mgmt *mgmt, + const struct ieee80211_rann_ie *rann) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct mesh_path *mpath; u8 ttl, flags, hopcount; - u8 *orig_addr; + const u8 *orig_addr; u32 orig_sn, metric, metric_txsta, interval; bool root_is_gate; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index d5786c3eaee2..2ce4c4023a97 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -181,7 +181,7 @@ errcopy: return -ENOMEM; } -static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, +static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { /* Use last four bytes of hw addr and interface index as hash index */ @@ -326,8 +326,8 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, } -static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, - struct ieee80211_sub_if_data *sdata) +static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, + struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct hlist_node *n; @@ -359,7 +359,8 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst, * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +struct mesh_path *mesh_path_lookup(const u8 *dst, + struct ieee80211_sub_if_data *sdata) { return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata); } @@ -494,7 +495,7 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) * * State: the initial state of the new path is set to 0 */ -int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) +int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 05b229e3b226..7a8cd789e487 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1041,10 +1041,10 @@ static void ieee80211_chswitch_timer(unsigned long data) ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); } -void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel_sw_ie *sw_elem, - struct ieee80211_bss *bss, - u64 timestamp) +void +ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss, u64 timestamp) { struct cfg80211_bss *cbss = container_of((void *)bss, struct cfg80211_bss, priv); @@ -1479,13 +1479,14 @@ void ieee80211_dfs_cac_timer_work(struct work_struct *work) /* MLME */ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - u8 *wmm_param, size_t wmm_param_len) + const u8 *wmm_param, size_t wmm_param_len) { struct ieee80211_tx_queue_params params; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t left; int count; - u8 *pos, uapsd_queues = 0; + const u8 *pos; + u8 uapsd_queues = 0; if (!local->ops->conf_tx) return false; @@ -2670,7 +2671,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; if (elems->tim && !elems->parse_error) { - struct ieee80211_tim_ie *tim_ie = elems->tim; + const struct ieee80211_tim_ie *tim_ie = elems->tim; sdata->u.mgd.dtim_period = tim_ie->dtim_period; } } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e24ff38606a9..0f38f43ac62e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1035,7 +1035,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, - u8 *extra, size_t extra_len, const u8 *da, + const u8 *extra, size_t extra_len, const u8 *da, const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx, u32 tx_flags) { @@ -1947,7 +1947,7 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, } void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, - struct ieee80211_ht_operation *ht_oper, + const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) { enum nl80211_channel_type channel_type; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index a9549fcc5a04..a2c2258bc84e 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -13,10 +13,11 @@ #include "rate.h" -void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct ieee80211_vht_cap *vht_cap_ie, - struct sta_info *sta) +void +ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_vht_cap *vht_cap_ie, + struct sta_info *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; -- cgit v1.2.3 From c6f9d6c3bdeb337809d667ef2a41597229a1ce57 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Feb 2013 14:27:08 +0100 Subject: mac80211: advertise operating mode notification capability Use the new extended capabilities advertising to advertise the fact that operating mode notification is supported. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ net/mac80211/main.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e085fcf52b26..7e24fe0cfbcd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1776,6 +1776,8 @@ enum ieee80211_tdls_actioncode { #define WLAN_EXT_CAPA5_TDLS_ENABLED BIT(5) #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) +#define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) + /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 9cdbc774cfd7..035344bc6b9c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -501,6 +501,11 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { }, }; +static const u8 extended_capabilities[] = { + 0, 0, 0, 0, 0, 0, 0, + WLAN_EXT_CAPA8_OPMODE_NOTIF, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -557,6 +562,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, WIPHY_FLAG_REPORTS_OBSS | WIPHY_FLAG_OFFCHAN_TX; + wiphy->extended_capabilities = extended_capabilities; + wiphy->extended_capabilities_mask = extended_capabilities; + wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities); + if (ops->remain_on_channel) wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; -- cgit v1.2.3 From ea27c396174d5a4576853cbe7aeabeb9f7cba6e1 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Tue, 12 Feb 2013 01:10:57 +0800 Subject: pinctrl: generic: rename input schmitt disable Rename PIN_CONFIG_INPUT_SCHMITT_DISABLE to PIN_CONFIG_INPUT_SCHMITT_ENABLE. It's used to make it more generialize. Signed-off-by: Haojian Zhuang Acked-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/pinctrl/pinconf-generic.c | 2 +- include/linux/pinctrl/pinconf-generic.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index e5948f8172af..06c304ac6f7d 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -41,7 +41,7 @@ struct pin_config_item conf_items[] = { PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL), - PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_DISABLE, "input schmitt disabled", NULL), + PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_ENABLE, "input schmitt enabled", NULL), PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL), PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"), PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"), diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 3e7909aa5c03..72474e18f1e0 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -48,7 +48,9 @@ * argument is ignored. * @PIN_CONFIG_DRIVE_STRENGTH: the pin will output the current passed as * argument. The argument is in mA. - * @PIN_CONFIG_INPUT_SCHMITT_DISABLE: disable schmitt-trigger mode on the pin. + * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin. + * If the argument != 0, schmitt-trigger mode is enabled. If it's 0, + * schmitt-trigger mode is disabled. * @PIN_CONFIG_INPUT_SCHMITT: this will configure an input pin to run in * schmitt-trigger mode. If the schmitt-trigger has adjustable hysteresis, * the threshold value is given on a custom format as argument when @@ -82,7 +84,7 @@ enum pin_config_param { PIN_CONFIG_DRIVE_OPEN_DRAIN, PIN_CONFIG_DRIVE_OPEN_SOURCE, PIN_CONFIG_DRIVE_STRENGTH, - PIN_CONFIG_INPUT_SCHMITT_DISABLE, + PIN_CONFIG_INPUT_SCHMITT_ENABLE, PIN_CONFIG_INPUT_SCHMITT, PIN_CONFIG_INPUT_DEBOUNCE, PIN_CONFIG_POWER_SOURCE, -- cgit v1.2.3 From 73fb847a44224d5708550e4be7baba9da75e00af Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:45 +0300 Subject: SUNRPC: introduce cache_detail->cache_request callback This callback will allow to simplify upcalls in further patches in this series. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfs/dns_resolve.c | 3 ++- fs/nfsd/export.c | 6 ++++-- fs/nfsd/nfs4idmap.c | 6 ++++-- include/linux/sunrpc/cache.h | 4 ++++ net/sunrpc/auth_gss/svcauth_gss.c | 3 ++- net/sunrpc/svcauth_unix.c | 6 ++++-- 6 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 9cbc98a4159d..55bc5d11457c 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -144,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); + ret = sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); return ret; } @@ -359,6 +359,7 @@ static struct cache_detail nfs_dns_resolve_template = { .name = "dns_resolve", .cache_put = nfs_dns_ent_put, .cache_upcall = nfs_dns_upcall, + .cache_request = nfs_dns_request, .cache_parse = nfs_dns_parse, .cache_show = nfs_dns_show, .match = nfs_dns_match, diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8e9df45b3d3c..0e16c7fa6800 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -69,7 +69,7 @@ static void expkey_request(struct cache_detail *cd, static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, @@ -246,6 +246,7 @@ static struct cache_detail svc_expkey_cache_template = { .name = "nfsd.fh", .cache_put = expkey_put, .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -340,7 +341,7 @@ static void svc_export_request(struct cache_detail *cd, static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct svc_export *svc_export_update(struct svc_export *new, @@ -714,6 +715,7 @@ static struct cache_detail svc_export_cache_template = { .name = "nfsd.export", .cache_put = svc_export_put, .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index a1f10c0a6255..9033dfde1812 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -142,7 +142,7 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, static int idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) { - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); + return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); } static int @@ -193,6 +193,7 @@ static struct cache_detail idtoname_cache_template = { .name = "nfs4.idtoname", .cache_put = ent_put, .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -323,7 +324,7 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, static int nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) { - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); + return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); } static int @@ -366,6 +367,7 @@ static struct cache_detail nametoid_cache_template = { .name = "nfs4.nametoid", .cache_put = ent_put, .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 5dc9ee4d616e..4f1c8582053c 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -83,6 +83,10 @@ struct cache_detail { int (*cache_upcall)(struct cache_detail *, struct cache_head *); + void (*cache_request)(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen); + int (*cache_parse)(struct cache_detail *, char *buf, int len); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index a5b41e2ac25a..1b0df530b59d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -184,7 +184,7 @@ static void rsi_request(struct cache_detail *cd, static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, rsi_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } @@ -276,6 +276,7 @@ static struct cache_detail rsi_cache_template = { .name = "auth.rpcsec.init", .cache_put = rsi_put, .cache_upcall = rsi_upcall, + .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index ce34c8e5b8eb..18b8742eaa50 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -159,7 +159,7 @@ static void ip_map_request(struct cache_detail *cd, static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); @@ -472,7 +472,7 @@ static void unix_gid_request(struct cache_detail *cd, static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); + return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); } static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid); @@ -578,6 +578,7 @@ static struct cache_detail unix_gid_cache_template = { .name = "auth.unix.gid", .cache_put = unix_gid_put, .cache_upcall = unix_gid_upcall, + .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, @@ -875,6 +876,7 @@ static struct cache_detail ip_map_cache_template = { .name = "auth.unix.ip", .cache_put = ip_map_put, .cache_upcall = ip_map_upcall, + .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, -- cgit v1.2.3 From 21cd1254d3402a72927ed744e8ac1a7cf532f1ea Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:55 +0300 Subject: SUNRPC: remove "cache_request" argument in sunrpc_cache_pipe_upcall() function Passing this pointer is redundant since it's stored on cache_detail structure, which is also passed to sunrpc_cache_pipe_upcall () function. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfs/dns_resolve.c | 2 +- include/linux/sunrpc/cache.h | 6 +----- net/sunrpc/cache.c | 10 +++------- 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 55bc5d11457c..945527092295 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -144,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request); + ret = sunrpc_cache_pipe_upcall(cd, ch); return ret; } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 4f1c8582053c..303399b1ba59 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -161,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)); +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); extern void cache_clean_deferred(void *owner); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 51853d8ed0bc..8ffec5aebeff 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -198,7 +198,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) { if (cd->cache_upcall) return cd->cache_upcall(cd, h); - return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request); + return sunrpc_cache_pipe_upcall(cd, h); } static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) @@ -1140,11 +1140,7 @@ static bool cache_listeners_exist(struct cache_detail *detail) * * Each request is at most one page long. */ -int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; @@ -1172,7 +1168,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, bp = buf; len = PAGE_SIZE; - cache_request(detail, h, &bp, &len); + detail->cache_request(detail, h, &bp, &len); if (len < 0) { kfree(buf); -- cgit v1.2.3 From 686855f5d833178e518d79e7912cdb3268a9fa69 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Feb 2013 18:19:58 +0400 Subject: sched: add wait_for_completion_io[_timeout] The only difference between wait_for_completion[_timeout]() and wait_for_completion_io[_timeout]() is that the latter calls io_schedule_timeout() instead of schedule_timeout() so that the caller is accounted as waiting for IO, not just sleeping. These functions can be used for correct iowait time accounting when the completion struct is actually used for waiting for IO (e.g. completion of a bio request in the block layer). Signed-off-by: Vladimir Davydov Acked-by: Ingo Molnar Signed-off-by: Jens Axboe --- include/linux/completion.h | 3 +++ kernel/sched/core.c | 57 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 51494e6b5548..33f0280fd533 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x) } extern void wait_for_completion(struct completion *); +extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_io_timeout(struct completion *x, + unsigned long timeout); extern long wait_for_completion_interruptible_timeout( struct completion *x, unsigned long timeout); extern long wait_for_completion_killable_timeout( diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb0..d6fdcdcbb9b1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3267,7 +3267,8 @@ void complete_all(struct completion *x) EXPORT_SYMBOL(complete_all); static inline long __sched -do_wait_for_common(struct completion *x, long timeout, int state) +do_wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { if (!x->done) { DECLARE_WAITQUEUE(wait, current); @@ -3280,7 +3281,7 @@ do_wait_for_common(struct completion *x, long timeout, int state) } __set_current_state(state); spin_unlock_irq(&x->wait.lock); - timeout = schedule_timeout(timeout); + timeout = action(timeout); spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); __remove_wait_queue(&x->wait, &wait); @@ -3291,17 +3292,30 @@ do_wait_for_common(struct completion *x, long timeout, int state) return timeout ?: 1; } -static long __sched -wait_for_common(struct completion *x, long timeout, int state) +static inline long __sched +__wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { might_sleep(); spin_lock_irq(&x->wait.lock); - timeout = do_wait_for_common(x, timeout, state); + timeout = do_wait_for_common(x, action, timeout, state); spin_unlock_irq(&x->wait.lock); return timeout; } +static long __sched +wait_for_common(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, schedule_timeout, timeout, state); +} + +static long __sched +wait_for_common_io(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, io_schedule_timeout, timeout, state); +} + /** * wait_for_completion: - waits for completion of a task * @x: holds the state of this particular completion @@ -3337,6 +3351,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout) } EXPORT_SYMBOL(wait_for_completion_timeout); +/** + * wait_for_completion_io: - waits for completion of a task + * @x: holds the state of this particular completion + * + * This waits to be signaled for completion of a specific task. It is NOT + * interruptible and there is no timeout. The caller is accounted as waiting + * for IO. + */ +void __sched wait_for_completion_io(struct completion *x) +{ + wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io); + +/** + * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout) + * @x: holds the state of this particular completion + * @timeout: timeout value in jiffies + * + * This waits for either a completion of a specific task to be signaled or for a + * specified timeout to expire. The timeout is in jiffies. It is not + * interruptible. The caller is accounted as waiting for IO. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. + */ +unsigned long __sched +wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) +{ + return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io_timeout); + /** * wait_for_completion_interruptible: - waits for completion of a task (w/intr) * @x: holds the state of this particular completion -- cgit v1.2.3 From f25cc71e634edcf8a15bc60a48f2b5f3ec9fbb1d Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Wed, 13 Feb 2013 08:40:16 -0700 Subject: lockd: nlmclnt_reclaim(): avoid stack overflow Even though nlmclnt_reclaim() is only one call into the stack frame, 928 bytes on the stack seems like a lot. Recode to dynamically allocate the request structure once from within the reclaimer task, then pass this pointer into nlmclnt_reclaim() for reuse on subsequent calls. smatch analysis: fs/lockd/clntproc.c:620 nlmclnt_reclaim() warn: 'reqst' puts 928 bytes on stack Also remove redundant assignment of 0 after memset. Cc: Trond Myklebust Signed-off-by: Tim Gardner Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 12 +++++++++++- fs/lockd/clntproc.c | 6 ++---- include/linux/lockd/lockd.h | 3 ++- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 4885b53e56bd..6cd673d34fb9 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -220,10 +220,19 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; + struct nlm_rqst *req; struct file_lock *fl, *next; u32 nsmstate; struct net *net = host->net; + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + printk(KERN_ERR "lockd: reclaimer unable to alloc memory." + " Locks for %s won't be reclaimed!\n", + host->h_name); + return 0; + } + allow_signal(SIGKILL); down_write(&host->h_rwsem); @@ -253,7 +262,7 @@ restart: */ if (signalled()) continue; - if (nlmclnt_reclaim(host, fl) != 0) + if (nlmclnt_reclaim(host, fl, req) != 0) continue; list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); if (host->h_nsmstate != nsmstate) { @@ -279,5 +288,6 @@ restart: /* Release host handle after use */ nlmclnt_release_host(host); lockd_down(net); + kfree(req); return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 54f9e6ce0430..b43114c4332a 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -615,17 +615,15 @@ out_unlock: * RECLAIM: Try to reclaim a lock */ int -nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) +nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl, + struct nlm_rqst *req) { - struct nlm_rqst reqst, *req; int status; - req = &reqst; memset(req, 0, sizeof(*req)); locks_init_lock(&req->a_args.lock.fl); locks_init_lock(&req->a_res.lock.fl); req->a_host = host; - req->a_flags = 0; /* Set up the argument struct */ nlmclnt_setlockargs(req, fl); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f5a051a79273..a395f1e7998f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -212,7 +212,8 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); -int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); +int nlmclnt_reclaim(struct nlm_host *, struct file_lock *, + struct nlm_rqst *); void nlmclnt_next_cookie(struct nlm_cookie *); /* -- cgit v1.2.3 From 14bbd6a565e1bcdc240d44687edb93f721cfdf99 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 09:44:49 +0000 Subject: net: Add skb_unclone() helper function. This function will be used in next GRE_GSO patch. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet --- drivers/net/ppp/ppp_generic.c | 3 +-- include/linux/skbuff.h | 10 ++++++++++ net/ipv4/ah4.c | 3 +-- net/ipv4/ip_fragment.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_mode_tunnel.c | 3 +-- net/ipv6/ah6.c | 3 +-- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/xfrm6_mode_tunnel.c | 3 +-- net/sched/act_ipt.c | 6 ++---- net/sched/act_pedit.c | 3 +-- 13 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0b2706abe3e3..4fd754e74eb2 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1805,8 +1805,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* the filter instructions are constructed assuming a four-byte PPP header on each packet */ if (ppp->pass_filter || ppp->active_filter) { - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto err; *skb_push(skb, 2) = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9da99520ccd5..ca6ee7d93edb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -804,6 +804,16 @@ static inline int skb_cloned(const struct sk_buff *skb) (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(pri & __GFP_WAIT); + + if (skb_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a69b4e4a02b5..2e7f1948216f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1211613c6c34..b6d30acb600c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -590,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6182d90e97b0..fd0cea114b5d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; __pskb_trim_head(skb, len); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216dc..1f12c8b45864 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto drop; /* Now we can update and verify the packet length... */ diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ddee0a099a2c..1162ace30838 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -142,8 +142,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) for_each_input_rcu(rcv_notify_handlers, handler) handler->handler(skb); - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 384233188ac1..bb02e176cb70 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c674f158efa8..b89a8c3186cd 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -368,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + if (skb_unclone(head, GFP_ATOMIC)) { pr_debug("skb is cloned but can't expand head"); goto out_oom; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index bab2c270f292..e354743ed426 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -404,7 +404,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9f2095b19ad0..93c41a81c4c3 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -69,8 +69,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0fb9e3f567e6..e0f6de64afec 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_ipt *ipt = a->priv; struct xt_action_param par; - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - } + if (skb_unclone(skb, GFP_ATOMIC)) + return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 0c3faddf3f2c..7ed78c9e505c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -131,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, int i, munged = 0; unsigned int off; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; off = skb_network_offset(skb); -- cgit v1.2.3 From 05e8ef4ab2d8087d360e814d14da20b9f7fb2283 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 09:44:55 +0000 Subject: net: factor out skb_mac_gso_segment() from skb_gso_segment() This function will be used in next GRE_GSO patch. This patch does not change any functionality. It only exports skb_mac_gso_segment() function. [ Use skb_reset_mac_len() -DaveM ] Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 79 ++++++++++++++++++++++++++++------------------- 2 files changed, 50 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9deb672d999f..920361bc27e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2671,6 +2671,8 @@ extern void netdev_upper_dev_unlink(struct net_device *dev, extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); +extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features); static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) diff --git a/net/core/dev.c b/net/core/dev.c index f44473696b8b..67deae60214c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2327,37 +2327,20 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/* openvswitch calls this on rx path, so we need a different check. - */ -static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) -{ - if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; - else - return skb->ip_summed == CHECKSUM_NONE; -} - /** - * __skb_gso_segment - Perform segmentation on skb. + * skb_mac_gso_segment - mac layer segmentation handler. * @skb: buffer to segment * @features: features for the output path (see dev->features) - * @tx_path: whether it is called in TX path - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path) +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; __be16 type = skb->protocol; - int vlan_depth = ETH_HLEN; - int err; while (type == htons(ETH_P_8021Q)) { + int vlan_depth = ETH_HLEN; struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) @@ -2368,22 +2351,14 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, vlan_depth += VLAN_HLEN; } - skb_reset_mac_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb_needs_check(skb, tx_path))) { - skb_warn_bad_offload(skb); - - if (skb_header_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) - return ERR_PTR(err); - } - rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + int err; + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -2401,6 +2376,48 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return segs; } +EXPORT_SYMBOL(skb_mac_gso_segment); + + +/* openvswitch calls this on rx path, so we need a different check. + */ +static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL; + else + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + skb_warn_bad_offload(skb); + + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + return skb_mac_gso_segment(skb, features); +} EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ -- cgit v1.2.3 From 68c331631143f5f039baac99a650e0b9e1ea02b6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 14:02:41 +0000 Subject: v4 GRE: Add TCP segmentation offload for GRE Following patch adds GRE protocol offload handler so that skb_gso_segment() can segment GRE packets. SKB GSO CB is added to keep track of total header length so that skb_segment can push entire header. e.g. in case of GRE, skb_segment need to push inner and outer headers to every segment. New NETIF_F_GRE_GSO feature is added for devices which support HW GRE TSO offload. Currently none of devices support it therefore GRE GSO always fall backs to software GSO. [ Compute pkt_len before ip_local_out() invocation. -DaveM ] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +- include/linux/skbuff.h | 17 ++++++ net/core/dev.c | 1 + net/core/ethtool.c | 1 + net/core/skbuff.c | 6 +- net/ipv4/af_inet.c | 1 + net/ipv4/gre.c | 118 ++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 82 +++++++++++++++++++++++++--- net/ipv4/tcp.c | 1 + net/ipv4/udp.c | 3 +- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 3 +- 12 files changed, 226 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 5ac32123035a..3dd39340430e 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -41,7 +41,7 @@ enum { NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ - NETIF_F_GSO_RESERVED1, /* ... free (fill GSO_MASK to 8 bits) */ + NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ = NETIF_F_GSO_LAST, @@ -102,6 +102,7 @@ enum { #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) +#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca6ee7d93edb..821c7f45d2a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,6 +314,8 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + SKB_GSO_GRE = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) } #endif +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. */ +struct skb_gso_cb { + int mac_offset; +}; +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/core/dev.c b/net/core/dev.c index 67deae60214c..1cd6297fd34b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2413,6 +2413,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); skb_reset_mac_header(skb); skb_reset_mac_len(skb); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d9d55209db67..3e9b2c3e30f0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c1ad09f8796..2a3ca33c30aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; + unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; int sg = !!(features & NETIF_F_SG); @@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); + + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) continue; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e6e5d8506336..e225a4e5b572 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | 0))) goto out; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a491055c76..7a4c710c4cdd 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,11 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + if (greh->protocol == htons(ETH_P_TEB)) { + struct ethhdr *eth = eth_hdr(skb); + skb->protocol = eth->h_proto; + } else { + skb->protocol = greh->protocol; + } + + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + } while ((skb = skb->next)); +out: + return segs; +} + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, .err_handler = gre_err, .netns_ok = 1, }; +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); @@ -127,11 +238,18 @@ static int __init gre_init(void) return -EAGAIN; } + if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { + pr_err("can't add protocol offload\n"); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); + return -EAGAIN; + } + return 0; } static void __exit gre_exit(void) { + inet_del_offload(&gre_offload, IPPROTO_GRE); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 00a14b9864ea..a56f1182c176 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,8 +735,33 @@ drop: return 0; } +static struct sk_buff *handle_offloads(struct sk_buff *skb) +{ + int err; + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } + skb->ip_summed = CHECKSUM_NONE; + + return skb; + +error: + kfree_skb(skb); + return ERR_PTR(err); +} + static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *old_iph; const struct iphdr *tiph; @@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; u8 ttl; + int err; + int pkt_len; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) - goto tx_error; + skb = handle_offloads(skb); + if (IS_ERR(skb)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } old_iph = ip_hdr(skb); @@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); - if ((old_iph->frag_off&htons(IP_DF)) && + if (!skb_is_gso(skb) && + (old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { + if (!skb_is_gso(skb) && + mtu >= IPV6_MIN_MTU && + mtu < skb->len - tunnel->hlen + gre_hlen) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; + iph->id = 0; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev *ptr = tunnel->parms.o_key; ptr--; } - if (tunnel->parms.o_flags&GRE_CSUM) { + /* Skip GRE checksum if skb is getting offloaded. */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && + (tunnel->parms.o_flags&GRE_CSUM)) { int offset = skb_transport_offset(skb); + if (skb_has_shared_frag(skb)) { + err = __skb_linearize(skb); + if (err) { + ip_rt_put(rt); + goto tx_error; + } + } + *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, skb->len - offset, @@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - iptunnel_xmit(skb, dev); + nf_reset(skb); + + pkt_len = skb->len - skb_transport_offset(skb); + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) @@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) mtu = 68; tunnel->hlen = addend; + /* TCP offload with GRE SEQ is not supported. */ + if (!(tunnel->parms.o_flags & GRE_SEQ)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } return mtu; } @@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev) dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f0bedb8622f..7a5ba48c2cc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_GRE | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6791aac06ea9..39a5e7a9a77f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f095..8234c1dcdf72 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0c8934a317c2..cf05cf073c51 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; -- cgit v1.2.3 From 0db3863add5cb249520b31a4ad36f275a30e7ba6 Mon Sep 17 00:00:00 2001 From: Michael Trimarchi Date: Fri, 15 Feb 2013 14:43:38 -0800 Subject: Input: bma150 - make some defines public and fix some comments Make the constants referring to range and bandwidth public so they can be used when initializing the platform data fields in the platform code. Fix also some comments regarding the unit of measurement to use for the range and bandwidth fields, the values are not actually expected to be in G or HZ, the code in bma150.c just uses the BMA150_RANGE_xxx and BMA150_BW_xxx constants like they are with no translation from actual values in G or HZ. Signed-off-by: Michael Trimarchi Signed-off-by: Antonio Ospite Signed-off-by: Dmitry Torokhov --- drivers/input/misc/bma150.c | 12 ------------ include/linux/bma150.h | 16 ++++++++++++++-- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/misc/bma150.c b/drivers/input/misc/bma150.c index e5d18946bb2e..865c2f9d25b9 100644 --- a/drivers/input/misc/bma150.c +++ b/drivers/input/misc/bma150.c @@ -46,18 +46,6 @@ #define BMA150_POLL_MAX 200 #define BMA150_POLL_MIN 0 -#define BMA150_BW_25HZ 0 -#define BMA150_BW_50HZ 1 -#define BMA150_BW_100HZ 2 -#define BMA150_BW_190HZ 3 -#define BMA150_BW_375HZ 4 -#define BMA150_BW_750HZ 5 -#define BMA150_BW_1500HZ 6 - -#define BMA150_RANGE_2G 0 -#define BMA150_RANGE_4G 1 -#define BMA150_RANGE_8G 2 - #define BMA150_MODE_NORMAL 0 #define BMA150_MODE_SLEEP 2 #define BMA150_MODE_WAKE_UP 3 diff --git a/include/linux/bma150.h b/include/linux/bma150.h index 7911fda23bb4..97ade7cdc870 100644 --- a/include/linux/bma150.h +++ b/include/linux/bma150.h @@ -22,6 +22,18 @@ #define BMA150_DRIVER "bma150" +#define BMA150_RANGE_2G 0 +#define BMA150_RANGE_4G 1 +#define BMA150_RANGE_8G 2 + +#define BMA150_BW_25HZ 0 +#define BMA150_BW_50HZ 1 +#define BMA150_BW_100HZ 2 +#define BMA150_BW_190HZ 3 +#define BMA150_BW_375HZ 4 +#define BMA150_BW_750HZ 5 +#define BMA150_BW_1500HZ 6 + struct bma150_cfg { bool any_motion_int; /* Set to enable any-motion interrupt */ bool hg_int; /* Set to enable high-G interrupt */ @@ -34,8 +46,8 @@ struct bma150_cfg { unsigned char lg_hyst; /* Low-G hysterisis */ unsigned char lg_dur; /* Low-G duration */ unsigned char lg_thres; /* Low-G threshold */ - unsigned char range; /* BMA0150_RANGE_xxx (in G) */ - unsigned char bandwidth; /* BMA0150_BW_xxx (in Hz) */ + unsigned char range; /* one of BMA0150_RANGE_xxx */ + unsigned char bandwidth; /* one of BMA0150_BW_xxx */ }; struct bma150_platform_data { -- cgit v1.2.3 From 9eee07d39fa606a191ae65d3c0e12771a80e70ca Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 15 Feb 2013 17:04:12 -0800 Subject: Input: tegra-kbc - require CONFIG_OF, remove platform data Tegra only supports, and always enables, device tree. Remove all ifdefs and runtime checks for DT support from the driver. Platform data is therefore no longer required. Delete the header that defines it, and rework the driver to parse the device tree directly into struct tegra_kbc. Signed-off-by: Stephen Warren Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 2 +- drivers/input/keyboard/tegra-kbc.c | 195 +++++++++++++++++-------------------- include/linux/input/tegra_kbc.h | 62 ------------ 3 files changed, 93 insertions(+), 166 deletions(-) delete mode 100644 include/linux/input/tegra_kbc.h (limited to 'include/linux') diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 72377737eec8..992137cf3a64 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -420,7 +420,7 @@ config KEYBOARD_NOMADIK config KEYBOARD_TEGRA tristate "NVIDIA Tegra internal matrix keyboard controller support" - depends on ARCH_TEGRA + depends on ARCH_TEGRA && OF select INPUT_MATRIXKMAP help Say Y here if you want to use a matrix keyboard connected directly diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index 4526bdd36022..d89e7d392d1e 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -29,9 +29,16 @@ #include #include #include -#include +#include #include +#define KBC_MAX_GPIO 24 +#define KBC_MAX_KPENT 8 + +#define KBC_MAX_ROW 16 +#define KBC_MAX_COL 8 +#define KBC_MAX_KEY (KBC_MAX_ROW * KBC_MAX_COL) + #define KBC_MAX_DEBOUNCE_CNT 0x3ffu /* KBC row scan time and delay for beginning the row scan. */ @@ -67,10 +74,27 @@ #define KBC_ROW_SHIFT 3 +enum tegra_pin_type { + PIN_CFG_IGNORE, + PIN_CFG_COL, + PIN_CFG_ROW, +}; + +struct tegra_kbc_pin_cfg { + enum tegra_pin_type type; + unsigned char num; +}; + struct tegra_kbc { + struct device *dev; + unsigned int debounce_cnt; + unsigned int repeat_cnt; + struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO]; + const struct matrix_keymap_data *keymap_data; + bool wakeup; void __iomem *mmio; struct input_dev *idev; - unsigned int irq; + int irq; spinlock_t lock; unsigned int repoll_dly; unsigned long cp_dly_jiffies; @@ -78,7 +102,6 @@ struct tegra_kbc { bool use_fn_map; bool use_ghost_filter; bool keypress_caused_wake; - const struct tegra_kbc_platform_data *pdata; unsigned short keycode[KBC_MAX_KEY * 2]; unsigned short current_keys[KBC_MAX_KPENT]; unsigned int num_pressed_keys; @@ -286,12 +309,11 @@ static irqreturn_t tegra_kbc_isr(int irq, void *args) static void tegra_kbc_setup_wakekeys(struct tegra_kbc *kbc, bool filter) { - const struct tegra_kbc_platform_data *pdata = kbc->pdata; int i; unsigned int rst_val; /* Either mask all keys or none. */ - rst_val = (filter && !pdata->wakeup) ? ~0 : 0; + rst_val = (filter && !kbc->wakeup) ? ~0 : 0; for (i = 0; i < KBC_MAX_ROW; i++) writel(rst_val, kbc->mmio + KBC_ROW0_MASK_0 + i * 4); @@ -299,7 +321,6 @@ static void tegra_kbc_setup_wakekeys(struct tegra_kbc *kbc, bool filter) static void tegra_kbc_config_pins(struct tegra_kbc *kbc) { - const struct tegra_kbc_platform_data *pdata = kbc->pdata; int i; for (i = 0; i < KBC_MAX_GPIO; i++) { @@ -315,13 +336,13 @@ static void tegra_kbc_config_pins(struct tegra_kbc *kbc) row_cfg &= ~r_mask; col_cfg &= ~c_mask; - switch (pdata->pin_cfg[i].type) { + switch (kbc->pin_cfg[i].type) { case PIN_CFG_ROW: - row_cfg |= ((pdata->pin_cfg[i].num << 1) | 1) << r_shft; + row_cfg |= ((kbc->pin_cfg[i].num << 1) | 1) << r_shft; break; case PIN_CFG_COL: - col_cfg |= ((pdata->pin_cfg[i].num << 1) | 1) << c_shft; + col_cfg |= ((kbc->pin_cfg[i].num << 1) | 1) << c_shft; break; case PIN_CFG_IGNORE: @@ -335,7 +356,6 @@ static void tegra_kbc_config_pins(struct tegra_kbc *kbc) static int tegra_kbc_start(struct tegra_kbc *kbc) { - const struct tegra_kbc_platform_data *pdata = kbc->pdata; unsigned int debounce_cnt; u32 val = 0; @@ -350,10 +370,10 @@ static int tegra_kbc_start(struct tegra_kbc *kbc) tegra_kbc_config_pins(kbc); tegra_kbc_setup_wakekeys(kbc, false); - writel(pdata->repeat_cnt, kbc->mmio + KBC_RPT_DLY_0); + writel(kbc->repeat_cnt, kbc->mmio + KBC_RPT_DLY_0); /* Keyboard debounce count is maximum of 12 bits. */ - debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT); + debounce_cnt = min(kbc->debounce_cnt, KBC_MAX_DEBOUNCE_CNT); val = KBC_DEBOUNCE_CNT_SHIFT(debounce_cnt); val |= KBC_FIFO_TH_CNT_SHIFT(1); /* set fifo interrupt threshold to 1 */ val |= KBC_CONTROL_FIFO_CNT_INT_EN; /* interrupt on FIFO threshold */ @@ -420,21 +440,20 @@ static void tegra_kbc_close(struct input_dev *dev) return tegra_kbc_stop(kbc); } -static bool -tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata, - struct device *dev, unsigned int *num_rows) +static bool tegra_kbc_check_pin_cfg(const struct tegra_kbc *kbc, + unsigned int *num_rows) { int i; *num_rows = 0; for (i = 0; i < KBC_MAX_GPIO; i++) { - const struct tegra_kbc_pin_cfg *pin_cfg = &pdata->pin_cfg[i]; + const struct tegra_kbc_pin_cfg *pin_cfg = &kbc->pin_cfg[i]; switch (pin_cfg->type) { case PIN_CFG_ROW: if (pin_cfg->num >= KBC_MAX_ROW) { - dev_err(dev, + dev_err(kbc->dev, "pin_cfg[%d]: invalid row number %d\n", i, pin_cfg->num); return false; @@ -444,7 +463,7 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata, case PIN_CFG_COL: if (pin_cfg->num >= KBC_MAX_COL) { - dev_err(dev, + dev_err(kbc->dev, "pin_cfg[%d]: invalid column number %d\n", i, pin_cfg->num); return false; @@ -455,7 +474,7 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata, break; default: - dev_err(dev, + dev_err(kbc->dev, "pin_cfg[%d]: invalid entry type %d\n", pin_cfg->type, pin_cfg->num); return false; @@ -465,12 +484,9 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata, return true; } -#ifdef CONFIG_OF -static struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata( - struct platform_device *pdev) +static int tegra_kbc_parse_dt(struct tegra_kbc *kbc) { - struct tegra_kbc_platform_data *pdata; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = kbc->dev->of_node; u32 prop; int i; u32 num_rows = 0; @@ -480,109 +496,96 @@ static struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata( int proplen; int ret; - if (!np) { - dev_err(&pdev->dev, "device tree data is missing\n"); - return ERR_PTR(-ENOENT); - } - - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return ERR_PTR(-ENOMEM); - if (!of_property_read_u32(np, "nvidia,debounce-delay-ms", &prop)) - pdata->debounce_cnt = prop; + kbc->debounce_cnt = prop; if (!of_property_read_u32(np, "nvidia,repeat-delay-ms", &prop)) - pdata->repeat_cnt = prop; + kbc->repeat_cnt = prop; if (of_find_property(np, "nvidia,needs-ghost-filter", NULL)) - pdata->use_ghost_filter = true; + kbc->use_ghost_filter = true; if (of_find_property(np, "nvidia,wakeup-source", NULL)) - pdata->wakeup = true; + kbc->wakeup = true; if (!of_get_property(np, "nvidia,kbc-row-pins", &proplen)) { - dev_err(&pdev->dev, "property nvidia,kbc-row-pins not found\n"); - return ERR_PTR(-ENOENT); + dev_err(kbc->dev, "property nvidia,kbc-row-pins not found\n"); + return -ENOENT; } num_rows = proplen / sizeof(u32); if (!of_get_property(np, "nvidia,kbc-col-pins", &proplen)) { - dev_err(&pdev->dev, "property nvidia,kbc-col-pins not found\n"); - return ERR_PTR(-ENOENT); + dev_err(kbc->dev, "property nvidia,kbc-col-pins not found\n"); + return -ENOENT; } num_cols = proplen / sizeof(u32); if (!of_get_property(np, "linux,keymap", &proplen)) { - dev_err(&pdev->dev, "property linux,keymap not found\n"); - return ERR_PTR(-ENOENT); + dev_err(kbc->dev, "property linux,keymap not found\n"); + return -ENOENT; } if (!num_rows || !num_cols || ((num_rows + num_cols) > KBC_MAX_GPIO)) { - dev_err(&pdev->dev, + dev_err(kbc->dev, "keypad rows/columns not porperly specified\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* Set all pins as non-configured */ for (i = 0; i < KBC_MAX_GPIO; i++) - pdata->pin_cfg[i].type = PIN_CFG_IGNORE; + kbc->pin_cfg[i].type = PIN_CFG_IGNORE; ret = of_property_read_u32_array(np, "nvidia,kbc-row-pins", rows_cfg, num_rows); if (ret < 0) { - dev_err(&pdev->dev, "Rows configurations are not proper\n"); - return ERR_PTR(-EINVAL); + dev_err(kbc->dev, "Rows configurations are not proper\n"); + return -EINVAL; } ret = of_property_read_u32_array(np, "nvidia,kbc-col-pins", cols_cfg, num_cols); if (ret < 0) { - dev_err(&pdev->dev, "Cols configurations are not proper\n"); - return ERR_PTR(-EINVAL); + dev_err(kbc->dev, "Cols configurations are not proper\n"); + return -EINVAL; } for (i = 0; i < num_rows; i++) { - pdata->pin_cfg[rows_cfg[i]].type = PIN_CFG_ROW; - pdata->pin_cfg[rows_cfg[i]].num = i; + kbc->pin_cfg[rows_cfg[i]].type = PIN_CFG_ROW; + kbc->pin_cfg[rows_cfg[i]].num = i; } for (i = 0; i < num_cols; i++) { - pdata->pin_cfg[cols_cfg[i]].type = PIN_CFG_COL; - pdata->pin_cfg[cols_cfg[i]].num = i; + kbc->pin_cfg[cols_cfg[i]].type = PIN_CFG_COL; + kbc->pin_cfg[cols_cfg[i]].num = i; } - return pdata; -} -#else -static inline struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata( - struct platform_device *pdev) -{ - dev_err(&pdev->dev, "platform data is missing\n"); - return ERR_PTR(-EINVAL); + return 0; } -#endif static int tegra_kbc_probe(struct platform_device *pdev) { - const struct tegra_kbc_platform_data *pdata = pdev->dev.platform_data; struct tegra_kbc *kbc; - struct input_dev *input_dev; struct resource *res; - int irq; int err; int num_rows = 0; unsigned int debounce_cnt; unsigned int scan_time_rows; unsigned int keymap_rows = KBC_MAX_KEY; - if (!pdata) - pdata = tegra_kbc_dt_parse_pdata(pdev); + kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL); + if (!kbc) { + dev_err(&pdev->dev, "failed to alloc memory for kbc\n"); + return -ENOMEM; + } + + kbc->dev = &pdev->dev; + spin_lock_init(&kbc->lock); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); + err = tegra_kbc_parse_dt(kbc); + if (err) + return err; - if (!tegra_kbc_check_pin_cfg(pdata, &pdev->dev, &num_rows)) + if (!tegra_kbc_check_pin_cfg(kbc, &num_rows)) return -EINVAL; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -591,28 +594,18 @@ static int tegra_kbc_probe(struct platform_device *pdev) return -ENXIO; } - irq = platform_get_irq(pdev, 0); - if (irq < 0) { + kbc->irq = platform_get_irq(pdev, 0); + if (kbc->irq < 0) { dev_err(&pdev->dev, "failed to get keyboard IRQ\n"); return -ENXIO; } - kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL); - if (!kbc) { - dev_err(&pdev->dev, "failed to alloc memory for kbc\n"); - return -ENOMEM; - } - - input_dev = devm_input_allocate_device(&pdev->dev); - if (!input_dev) { + kbc->idev = devm_input_allocate_device(&pdev->dev); + if (!kbc->idev) { dev_err(&pdev->dev, "failed to allocate input device\n"); return -ENOMEM; } - kbc->pdata = pdata; - kbc->idev = input_dev; - kbc->irq = irq; - spin_lock_init(&kbc->lock); setup_timer(&kbc->timer, tegra_kbc_keypress_timer, (unsigned long)kbc); kbc->mmio = devm_request_and_ioremap(&pdev->dev, res); @@ -633,36 +626,32 @@ static int tegra_kbc_probe(struct platform_device *pdev) * the rows. There is an additional delay before the row scanning * starts. The repoll delay is computed in milliseconds. */ - debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT); + debounce_cnt = min(kbc->debounce_cnt, KBC_MAX_DEBOUNCE_CNT); scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows; - kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt; + kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + kbc->repeat_cnt; kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS); - kbc->wakeup_key = pdata->wakeup_key; - kbc->use_fn_map = pdata->use_fn_map; - kbc->use_ghost_filter = pdata->use_ghost_filter; - - input_dev->name = pdev->name; - input_dev->id.bustype = BUS_HOST; - input_dev->dev.parent = &pdev->dev; - input_dev->open = tegra_kbc_open; - input_dev->close = tegra_kbc_close; + kbc->idev->name = pdev->name; + kbc->idev->id.bustype = BUS_HOST; + kbc->idev->dev.parent = &pdev->dev; + kbc->idev->open = tegra_kbc_open; + kbc->idev->close = tegra_kbc_close; - if (pdata->keymap_data && pdata->use_fn_map) + if (kbc->keymap_data && kbc->use_fn_map) keymap_rows *= 2; - err = matrix_keypad_build_keymap(pdata->keymap_data, NULL, + err = matrix_keypad_build_keymap(kbc->keymap_data, NULL, keymap_rows, KBC_MAX_COL, - kbc->keycode, input_dev); + kbc->keycode, kbc->idev); if (err) { dev_err(&pdev->dev, "failed to setup keymap\n"); return err; } - __set_bit(EV_REP, input_dev->evbit); - input_set_capability(input_dev, EV_MSC, MSC_SCAN); + __set_bit(EV_REP, kbc->idev->evbit); + input_set_capability(kbc->idev, EV_MSC, MSC_SCAN); - input_set_drvdata(input_dev, kbc); + input_set_drvdata(kbc->idev, kbc); err = devm_request_irq(&pdev->dev, kbc->irq, tegra_kbc_isr, IRQF_NO_SUSPEND | IRQF_TRIGGER_HIGH, pdev->name, kbc); @@ -680,7 +669,7 @@ static int tegra_kbc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, kbc); - device_init_wakeup(&pdev->dev, pdata->wakeup); + device_init_wakeup(&pdev->dev, kbc->wakeup); return 0; } diff --git a/include/linux/input/tegra_kbc.h b/include/linux/input/tegra_kbc.h deleted file mode 100644 index a13025612939..000000000000 --- a/include/linux/input/tegra_kbc.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Platform definitions for tegra-kbc keyboard input driver - * - * Copyright (c) 2010-2011, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef ASMARM_ARCH_TEGRA_KBC_H -#define ASMARM_ARCH_TEGRA_KBC_H - -#include -#include - -#define KBC_MAX_GPIO 24 -#define KBC_MAX_KPENT 8 - -#define KBC_MAX_ROW 16 -#define KBC_MAX_COL 8 -#define KBC_MAX_KEY (KBC_MAX_ROW * KBC_MAX_COL) - -enum tegra_pin_type { - PIN_CFG_IGNORE, - PIN_CFG_COL, - PIN_CFG_ROW, -}; - -struct tegra_kbc_pin_cfg { - enum tegra_pin_type type; - unsigned char num; -}; - -struct tegra_kbc_wake_key { - u8 row:4; - u8 col:4; -}; - -struct tegra_kbc_platform_data { - unsigned int debounce_cnt; - unsigned int repeat_cnt; - - struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO]; - const struct matrix_keymap_data *keymap_data; - - u32 wakeup_key; - bool wakeup; - bool use_fn_map; - bool use_ghost_filter; -}; -#endif -- cgit v1.2.3 From ac6324e7021dfa917ce4f9a836318c3e46fbb84e Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sun, 10 Feb 2013 18:32:18 +0100 Subject: bq2415x_charger: Add support for offline and 100mA mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Renamed mode BQ2415X_MODE_NONE to BQ2415X_MODE_OFF because this mode turning chaging completly off * Added new mode BQ2415X_MODE_NONE which enable charging with maximal current limit 100mA (this is minimal safe value for bq2415x chips) Signed-off-by: Pali Rohár Signed-off-by: Anton Vorontsov --- drivers/power/bq2415x_charger.c | 25 ++++++++++++++++++------- include/linux/power/bq2415x_charger.h | 3 ++- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c index ca70365e9410..ca91396fc48e 100644 --- a/drivers/power/bq2415x_charger.c +++ b/drivers/power/bq2415x_charger.c @@ -733,12 +733,10 @@ static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode) int charger = 0; int boost = 0; - if (mode == BQ2415X_MODE_HOST_CHARGER || - mode == BQ2415X_MODE_DEDICATED_CHARGER) - charger = 1; - if (mode == BQ2415X_MODE_BOOST) boost = 1; + else if (mode != BQ2415X_MODE_OFF) + charger = 1; if (!charger) ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE); @@ -750,6 +748,10 @@ static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode) return ret; switch (mode) { + case BQ2415X_MODE_OFF: + dev_dbg(bq->dev, "changing mode to: Offline\n"); + ret = bq2415x_set_current_limit(bq, 100); + break; case BQ2415X_MODE_NONE: dev_dbg(bq->dev, "changing mode to: N/A\n"); ret = bq2415x_set_current_limit(bq, 100); @@ -842,7 +844,7 @@ static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg) dev_err(bq->dev, "%s\n", msg); if (bq->automode > 0) bq->automode = 0; - bq2415x_set_mode(bq, BQ2415X_MODE_NONE); + bq2415x_set_mode(bq, BQ2415X_MODE_OFF); bq2415x_set_autotimer(bq, 0); } @@ -1135,6 +1137,10 @@ static ssize_t bq2415x_sysfs_set_mode(struct device *dev, return -ENOSYS; bq->automode = 1; mode = bq->reported_mode; + } else if (strncmp(buf, "off", 3) == 0) { + if (bq->automode > 0) + bq->automode = 0; + mode = BQ2415X_MODE_OFF; } else if (strncmp(buf, "none", 4) == 0) { if (bq->automode > 0) bq->automode = 0; @@ -1182,6 +1188,9 @@ static ssize_t bq2415x_sysfs_show_mode(struct device *dev, ret += sprintf(buf+ret, "auto ("); switch (bq->mode) { + case BQ2415X_MODE_OFF: + ret += sprintf(buf+ret, "off"); + break; case BQ2415X_MODE_NONE: ret += sprintf(buf+ret, "none"); break; @@ -1216,6 +1225,8 @@ static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev, return -EINVAL; switch (bq->reported_mode) { + case BQ2415X_MODE_OFF: + return sprintf(buf, "off\n"); case BQ2415X_MODE_NONE: return sprintf(buf, "none\n"); case BQ2415X_MODE_HOST_CHARGER: @@ -1535,8 +1546,8 @@ static int bq2415x_probe(struct i2c_client *client, bq->dev = &client->dev; bq->chip = id->driver_data; bq->name = name; - bq->mode = BQ2415X_MODE_NONE; - bq->reported_mode = BQ2415X_MODE_NONE; + bq->mode = BQ2415X_MODE_OFF; + bq->reported_mode = BQ2415X_MODE_OFF; bq->autotimer = 0; bq->automode = 0; diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h index 97a1665eaeaf..8dcc0f46fc0a 100644 --- a/include/linux/power/bq2415x_charger.h +++ b/include/linux/power/bq2415x_charger.h @@ -75,7 +75,8 @@ /* Supported modes with maximal current limit */ enum bq2415x_mode { - BQ2415X_MODE_NONE, /* unknown or no charger (100mA) */ + BQ2415X_MODE_OFF, /* offline mode (charger disabled) */ + BQ2415X_MODE_NONE, /* unknown charger (100mA) */ BQ2415X_MODE_HOST_CHARGER, /* usb host/hub charger (500mA) */ BQ2415X_MODE_DEDICATED_CHARGER, /* dedicated charger (unlimited) */ BQ2415X_MODE_BOOST, /* boost mode (charging disabled) */ -- cgit v1.2.3 From 558bd3e8dc7a798c5c845f90cf038b9bbd2df2b8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Feb 2013 21:51:27 -0500 Subject: PM idle: remove global declaration of pm_idle pm_idle appears in no generic Linux code, it appears only in architecture-specific code. Thus, pm_idle should not be declared in pm.h. Architectures that use an idle function pointer should delcare one local to their architecture, and/or use cpuidle. Signed-off-by: Len Brown Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Cc: linux-pm@vger.kernel.org --- include/linux/pm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 03d7bb145311..97bcf23e045a 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -31,7 +31,6 @@ /* * Callbacks for platform drivers to implement. */ -extern void (*pm_idle)(void); extern void (*pm_power_off)(void); extern void (*pm_power_off_prepare)(void); -- cgit v1.2.3 From e7e319a9c51409c7effe34333ea26facf2fab9e1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 14 Feb 2013 12:16:43 -0600 Subject: libceph: improve packing in struct ceph_osd_req_op The layout of struct ceph_osd_req_op leaves lots of holes. Rearranging things a little for better field alignment reduces the size by a third. This resolves: http://tracker.ceph.com/issues/4163 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 69287ccfe68a..82bf6338d6c1 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -157,6 +157,7 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ + u32 payload_len; union { struct { u64 offset, length; @@ -165,23 +166,24 @@ struct ceph_osd_req_op { } extent; struct { const char *name; - u32 name_len; const char *val; + u32 name_len; u32 value_len; __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ } xattr; struct { const char *class_name; - __u8 class_len; const char *method_name; - __u8 method_len; - __u8 argc; const char *indata; u32 indata_len; + __u8 class_len; + __u8 method_len; + __u8 argc; } cls; struct { - u64 cookie, count; + u64 cookie; + u64 count; } pgls; struct { u64 snapid; @@ -189,12 +191,11 @@ struct ceph_osd_req_op { struct { u64 cookie; u64 ver; - __u8 flag; u32 prot_ver; u32 timeout; + __u8 flag; } watch; }; - u32 payload_len; }; extern int ceph_osdc_init(struct ceph_osd_client *osdc, -- cgit v1.2.3 From 87f979d390f9ecfa3d0038a9f9a002a62f8a1895 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "nofail" parameter There is only one caller of ceph_osdc_writepages(), and it always passes the value true as its "nofail" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with the constant value true. This and a number of cleanup patches that follow resolve: http://tracker.ceph.com/issues/4126 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 2 +- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 064d1a68d2c1..c7e401c96fc9 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) page_off, len, ci->i_truncate_seq, ci->i_truncate_size, &inode->i_mtime, - &page, 1, 0, 0, true); + &page, 1, 0, 0); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 82bf6338d6c1..afcb255b016a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int nr_pages, - int flags, int do_sync, bool nofail); + int flags, int do_sync); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d9d58bbe9f9a..dd01b1340e95 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1867,7 +1867,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int num_pages, - int flags, int do_sync, bool nofail) + int flags, int do_sync) { struct ceph_osd_request *req; int rc = 0; @@ -1880,7 +1880,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_FLAG_WRITE, snapc, do_sync, truncate_seq, truncate_size, mtime, - nofail, 1, page_align); + true, 1, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1889,7 +1889,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, dout("writepages %llu~%llu (%d pages)\n", off, len, req->r_num_pages); - rc = ceph_osdc_start_request(osdc, req, nofail); + rc = ceph_osdc_start_request(osdc, req, true); if (!rc) rc = ceph_osdc_wait_request(osdc, req); -- cgit v1.2.3 From fbf8685fb155e12a9f4d4b966c7b3442ed557687 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "dosync" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "dosync" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 2 +- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c7e401c96fc9..bef552819312 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) page_off, len, ci->i_truncate_seq, ci->i_truncate_size, &inode->i_mtime, - &page, 1, 0, 0); + &page, 1, 0); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index afcb255b016a..7a63100a3e69 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int nr_pages, - int flags, int do_sync); + int flags); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index dd01b1340e95..ac186b7c9986 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1867,7 +1867,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int num_pages, - int flags, int do_sync) + int flags) { struct ceph_osd_request *req; int rc = 0; @@ -1878,7 +1878,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_OP_WRITE, flags | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - snapc, do_sync, + snapc, 0, truncate_seq, truncate_size, mtime, true, 1, page_align); if (IS_ERR(req)) -- cgit v1.2.3 From 2480882611e3ab844563dd3d0a822227604ab8fe Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "flags" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "flags" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 3 +-- include/linux/ceph/osd_client.h | 3 +-- net/ceph/osd_client.c | 6 ++---- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index bef552819312..8d3240d6f289 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -492,8 +492,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) &ci->i_layout, snapc, page_off, len, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, - &page, 1, 0); + &inode->i_mtime, &page, 1); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 7a63100a3e69..6540e8861998 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -274,8 +274,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int nr_pages, - int flags); + struct page **pages, int nr_pages); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ac186b7c9986..d4e3812bcebc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1866,8 +1866,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int num_pages, - int flags) + struct page **pages, int num_pages) { struct ceph_osd_request *req; int rc = 0; @@ -1876,8 +1875,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, CEPH_OSD_OP_WRITE, - flags | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, snapc, 0, truncate_seq, truncate_size, mtime, true, 1, page_align); -- cgit v1.2.3 From a3bea47e8bdd51d921e5b2045720d60140612c7c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_new_request() "num_reply" parameter The "num_reply" parameter to ceph_osdc_new_request() is never used inside that function, so get rid of it. Note that ceph_sync_write() passes 2 for that argument, while all other callers pass 1. It doesn't matter, but perhaps someone should verify this doesn't indicate a problem. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 4 ++-- fs/ceph/file.c | 2 +- include/linux/ceph/osd_client.h | 3 +-- net/ceph/osd_client.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8d3240d6f289..fc613715af46 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -315,7 +315,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, - NULL, false, 1, 0); + NULL, false, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -837,7 +837,7 @@ get_more_pages: snapc, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, true, 1, 0); + &inode->i_mtime, true, 0); if (IS_ERR(req)) { rc = PTR_ERR(req); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a1e5b81e8118..9c4325e654ca 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -541,7 +541,7 @@ more: ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2, page_align); + &mtime, false, page_align); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6540e8861998..5812802bd8ae 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -234,8 +234,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, - int page_align); + bool use_mempool, int page_align); extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d4e3812bcebc..d3e75138506b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -393,7 +393,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, + bool use_mempool, int page_align) { struct ceph_osd_req_op ops[2]; @@ -1837,7 +1837,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, req = ceph_osdc_new_request(osdc, layout, vino, off, plen, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, - false, 1, page_align); + false, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1878,7 +1878,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, snapc, 0, truncate_seq, truncate_size, mtime, - true, 1, page_align); + true, page_align); if (IS_ERR(req)) return PTR_ERR(req); -- cgit v1.2.3 From 60e56f138180e72fa8487d4b9c1c916013494f46 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: libceph: kill ceph_calc_raw_layout() There is no caller of ceph_calc_raw_layout() outside of libceph, so there's no need to export from the module. Furthermore, there is only one caller, in calc_layout(), and it is not much more than a simple wrapper for that function. So get rid of ceph_calc_raw_layout() and embed it instead within calc_layout(). While touching "osd_client.c", get rid of the unnecessary forward declaration of __send_request(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 5 --- net/ceph/osd_client.c | 77 +++++++++++++++++------------------------ 2 files changed, 32 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5812802bd8ae..c39e7ed4b203 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -207,11 +207,6 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op); - extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, unsigned int num_op, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0d67cd37173b..cd3a489b7438 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -38,49 +38,6 @@ static int op_has_extent(int op) op == CEPH_OSD_OP_WRITE); } -int ceph_calc_raw_layout(struct ceph_file_layout *layout, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) -{ - u64 orig_len = *plen; - u64 objoff, objlen; /* extent in object */ - int r; - - /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, orig_len, bno, - &objoff, &objlen); - if (r < 0) - return r; - if (objlen < orig_len) { - *plen = objlen; - dout(" skipping last %llu, final file extent %llu~%llu\n", - orig_len - *plen, off, *plen); - } - - if (op_has_extent(op->op)) { - u32 osize = le32_to_cpu(layout->fl_object_size); - op->extent.offset = objoff; - op->extent.length = objlen; - if (op->extent.truncate_size <= off - objoff) { - op->extent.truncate_size = 0; - } else { - op->extent.truncate_size -= off - objoff; - if (op->extent.truncate_size > osize) - op->extent.truncate_size = osize; - } - } - req->r_num_pages = calc_pages_for(off, *plen); - req->r_page_alignment = off & ~PAGE_MASK; - if (op->op == CEPH_OSD_OP_WRITE) - op->payload_len = *plen; - - dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", - *bno, objoff, objlen, req->r_num_pages); - return 0; -} -EXPORT_SYMBOL(ceph_calc_raw_layout); - /* * Implement client access to distributed object storage cluster. * @@ -112,12 +69,42 @@ static int calc_layout(struct ceph_vino vino, struct ceph_osd_request *req, struct ceph_osd_req_op *op) { - u64 bno; + u64 orig_len = *plen; + u64 bno = 0; + u64 objoff = 0; + u64 objlen = 0; int r; - r = ceph_calc_raw_layout(layout, off, plen, &bno, req, op); + /* object extent? */ + r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, + &objoff, &objlen); if (r < 0) return r; + if (objlen < orig_len) { + *plen = objlen; + dout(" skipping last %llu, final file extent %llu~%llu\n", + orig_len - *plen, off, *plen); + } + + if (op_has_extent(op->op)) { + u32 osize = le32_to_cpu(layout->fl_object_size); + op->extent.offset = objoff; + op->extent.length = objlen; + if (op->extent.truncate_size <= off - objoff) { + op->extent.truncate_size = 0; + } else { + op->extent.truncate_size -= off - objoff; + if (op->extent.truncate_size > osize) + op->extent.truncate_size = osize; + } + } + req->r_num_pages = calc_pages_for(off, *plen); + req->r_page_alignment = off & ~PAGE_MASK; + if (op->op == CEPH_OSD_OP_WRITE) + op->payload_len = *plen; + + dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", + bno, objoff, objlen, req->r_num_pages); snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); -- cgit v1.2.3 From 3c663bbdcdf9296e0fe3362acb9e81f49d7b72c6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: kill ceph_osdc_create_event() "one_shot" parameter There is only one caller of ceph_osdc_create_event(), and it provides 0 as its "one_shot" argument. Get rid of that argument and just use 0 in its place. Replace the code in handle_watch_notify() that executes if one_shot is nonzero in the event with a BUG_ON() call. While modifying "osd_client.c", give handle_watch_notify() static scope. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 2 +- include/linux/ceph/osd_client.h | 3 +-- net/ceph/osd_client.c | 11 +++++------ 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 982963ec2607..13ee789f2328 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1744,7 +1744,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) rbd_assert(start ^ !!rbd_dev->watch_request); if (start) { - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, rbd_dev, + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, &rbd_dev->watch_event); if (ret < 0) return ret; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c39e7ed4b203..39c55d61e159 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -273,8 +273,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent); + void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); extern int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index cd3a489b7438..4322faa7c811 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1477,8 +1477,7 @@ static void __remove_event(struct ceph_osd_event *event) int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent) + void *data, struct ceph_osd_event **pevent) { struct ceph_osd_event *event; @@ -1488,7 +1487,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, dout("create_event %p\n", event); event->cb = event_cb; - event->one_shot = one_shot; + event->one_shot = 0; event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); @@ -1541,7 +1540,8 @@ static void do_event_work(struct work_struct *work) /* * Process osd watch notifications */ -void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) +static void handle_watch_notify(struct ceph_osd_client *osdc, + struct ceph_msg *msg) { void *p, *end; u8 proto_ver; @@ -1562,9 +1562,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) spin_lock(&osdc->event_lock); event = __find_event(osdc, cookie); if (event) { + BUG_ON(event->one_shot); get_event(event); - if (event->one_shot) - __remove_event(event); } spin_unlock(&osdc->event_lock); dout("handle_watch_notify cookie %lld ver %lld event %p\n", -- cgit v1.2.3 From 2d2f522699fe8b827087941eb31b9a12cf465f17 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: kill ceph_osdc_wait_event() There are no actual users of ceph_osdc_wait_event(). This would have been one-shot events, but we no longer support those so just get rid of this function. Since this leaves nothing else that waits for the completion of an event, we can get rid of the completion in a struct ceph_osd_event. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 --- net/ceph/osd_client.c | 18 ------------------ 2 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 39c55d61e159..388158ff0cbc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -107,7 +107,6 @@ struct ceph_osd_event { struct rb_node node; struct list_head osd_node; struct kref kref; - struct completion completion; }; struct ceph_osd_event_work { @@ -275,8 +274,6 @@ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern int ceph_osdc_wait_event(struct ceph_osd_event *event, - unsigned long timeout); extern void ceph_osdc_put_event(struct ceph_osd_event *event); #endif diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4322faa7c811..ad6b8b35f5ca 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1494,7 +1494,6 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ - init_completion(&event->completion); spin_lock(&osdc->event_lock); event->cookie = ++osdc->event_count; @@ -1530,7 +1529,6 @@ static void do_event_work(struct work_struct *work) dout("do_event_work completing %p\n", event); event->cb(ver, notify_id, opcode, event->data); - complete(&event->completion); dout("do_event_work completed %p\n", event); ceph_osdc_put_event(event); kfree(event_work); @@ -1588,7 +1586,6 @@ static void handle_watch_notify(struct ceph_osd_client *osdc, return; done_err: - complete(&event->completion); ceph_osdc_put_event(event); return; @@ -1597,21 +1594,6 @@ bad: return; } -int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout) -{ - int err; - - dout("wait_event %p\n", event); - err = wait_for_completion_interruptible_timeout(&event->completion, - timeout * HZ); - ceph_osdc_put_event(event); - if (err > 0) - err = 0; - dout("wait_event %p returns %d\n", event, err); - return err; -} -EXPORT_SYMBOL(ceph_osdc_wait_event); - /* * Register request, send initial attempt. */ -- cgit v1.2.3 From 0315a7770983bbe69211efed1aaee08324acd54c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: update rados.h Update most of "include/linux/ceph/rados.h" to match its user space counterpart in "src/include/rados.h" in the ceph tree. Almost everything that has changed is either: - added or revised comments - added definitions (therefore no real effect on existing code) - defining the same value a different way (e.g., "1 << 0" vs "1") The only exceptions are: - The declaration of ceph_osd_state_name() was excluded; that will be inserted in the next patch. - ceph_osd_op_mode_read() and ceph_osd_op_mode_modify() are defined differently, but they were never used in the kernel - CEPH_OSD_FLAG_PEERSTAT is now CEPH_OSD_FLAG_PEERSTAT_OLD, but that was never used in the kernel Anything that was present in this file but not in its user space counterpart was left intact here. I left the definitions of EOLDSNAPC and EBLACKLISTED using numerical values here; I'm not sure the right way to go with those. This and the next two commits resolve: http://tracker.ceph.com/issues/4164 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/rados.h | 91 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1c..9c3b4aaf516b 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -145,8 +145,10 @@ struct ceph_eversion { */ /* status bits */ -#define CEPH_OSD_EXISTS 1 -#define CEPH_OSD_UP 2 +#define CEPH_OSD_EXISTS (1<<0) +#define CEPH_OSD_UP (1<<1) +#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ +#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 @@ -161,9 +163,25 @@ struct ceph_eversion { #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ +#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ +#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ +#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ +#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ +#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ +#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ + +/* + * The error code to return when an OSD can't handle a write + * because it is too large. + */ +#define OSD_WRITETOOBIG EMSGSIZE /* * osd ops + * + * WARNING: do not use these op codes directly. Use the helpers + * defined below instead. In certain cases, op code behavior was + * redefined, resulting in special-cases in the helpers. */ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 @@ -177,6 +195,7 @@ struct ceph_eversion { #define CEPH_OSD_OP_TYPE_ATTR 0x0300 #define CEPH_OSD_OP_TYPE_EXEC 0x0400 #define CEPH_OSD_OP_TYPE_PG 0x0500 +#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ enum { /** data **/ @@ -217,6 +236,23 @@ enum { CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + /* omap */ + CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, + CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, + CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, + CEPH_OSD_OP_OMAPGETVALSBYKEYS = + CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, + CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, + CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, + CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, + CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, + CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + + /** multi **/ + CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, + CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, + CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, + /** attrs **/ /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -238,6 +274,7 @@ enum { CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, + CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, /** lock **/ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, @@ -248,10 +285,12 @@ enum { CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, /** exec **/ + /* note: the RD bit here is wrong; see special-case below in helper */ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, /** pg **/ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, + CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, }; static inline int ceph_osd_op_type_lock(int op) @@ -274,6 +313,10 @@ static inline int ceph_osd_op_type_pg(int op) { return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; } +static inline int ceph_osd_op_type_multi(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; +} static inline int ceph_osd_op_mode_subop(int op) { @@ -281,11 +324,12 @@ static inline int ceph_osd_op_mode_subop(int op) } static inline int ceph_osd_op_mode_read(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; + return (op & CEPH_OSD_OP_MODE_RD) && + op != CEPH_OSD_OP_CALL; } static inline int ceph_osd_op_mode_modify(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; + return op & CEPH_OSD_OP_MODE_WR; } /* @@ -294,34 +338,38 @@ static inline int ceph_osd_op_mode_modify(int op) */ #define CEPH_OSD_TMAP_HDR 'h' #define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_CREATE 'c' /* create key */ #define CEPH_OSD_TMAP_RM 'r' +#define CEPH_OSD_TMAP_RMSLOPPY 'R' extern const char *ceph_osd_op_name(int op); - /* * osd op flags * * An op may be READ, WRITE, or READ|WRITE. */ enum { - CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ - CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ - CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ - CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ - CEPH_OSD_FLAG_READ = 16, /* op may read */ - CEPH_OSD_FLAG_WRITE = 32, /* op may write */ - CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ - CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ - CEPH_OSD_FLAG_BALANCE_READS = 256, - CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ - CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ - CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ - CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ + CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ + CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ + CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 0x0100, + CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ + CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ + CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ }; enum { CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ + CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ @@ -381,7 +429,11 @@ struct ceph_osd_op { __le64 ver; __u8 flag; /* 0 = unwatch, 1 = watch */ } __attribute__ ((packed)) watch; -}; + struct { + __le64 offset, length; + __le64 src_offset; + } __attribute__ ((packed)) clonerange; + }; __le32 payload_len; } __attribute__ ((packed)); @@ -424,5 +476,4 @@ struct ceph_osd_reply_head { } __attribute__ ((packed)); - #endif -- cgit v1.2.3 From 4b568b1aaf23d0ce64b98d01d5ad1bcc7694440a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: add ceph_osd_state_name() Add the definition of ceph_osd_state_name(), to match its counterpart in user space. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/rados.h | 2 ++ net/ceph/ceph_strings.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 9c3b4aaf516b..b65182aba6f7 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -150,6 +150,8 @@ struct ceph_eversion { #define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ #define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ +extern const char *ceph_osd_state_name(int s); + /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 #define CEPH_OSD_OUT 0 diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 3fbda04de29c..833075cff4e8 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -68,6 +68,21 @@ const char *ceph_osd_op_name(int op) return "???"; } +const char *ceph_osd_state_name(int s) +{ + switch (s) { + case CEPH_OSD_EXISTS: + return "exists"; + case CEPH_OSD_UP: + return "up"; + case CEPH_OSD_AUTOOUT: + return "autoout"; + case CEPH_OSD_NEW: + return "new"; + default: + return "???"; + } +} const char *ceph_pool_op_name(int op) { -- cgit v1.2.3 From dd6f5e105d85e02bc41db0891eb07152b1746ad9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: update ceph_fs.h Update most of "include/linux/ceph/ceph_fs.h" to match its user space counterpart in "src/include/ceph_fs.h" in the ceph tree. Everything that has changed is either: - added definitions (therefore no real effect on existing code) - deleting unused symbols - added or revised comments There were some differences between the struct definitions for ceph_mon_subscribe_item and the open field of ceph_mds_request_args; those differences remain. This and the next commit resolve: http://tracker.ceph.com/issues/4165 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/ceph_fs.h | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cf6f4d998a76..2ad7b860f062 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -21,16 +21,14 @@ * internal cluster protocols separately from the public, * client-facing protocol. */ -#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ -#define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ #define CEPH_MONC_PROTOCOL 15 /* server/client */ -#define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -51,7 +49,7 @@ struct ceph_file_layout { __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ - __le32 fl_unused; /* unused; used to be preferred primary (-1) */ + __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); @@ -101,6 +99,8 @@ struct ceph_dir_layout { #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 #define CEPH_MSG_AUTH 17 #define CEPH_MSG_AUTH_REPLY 18 +#define CEPH_MSG_MON_GET_VERSION 19 +#define CEPH_MSG_MON_GET_VERSION_REPLY 20 /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 @@ -220,6 +220,11 @@ struct ceph_mon_subscribe_ack { struct ceph_fsid fsid; } __attribute__ ((packed)); +/* + * mdsmap flags + */ +#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ + /* * mds states * > 0 -> in @@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ +#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */ #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed @@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s); #define CEPH_LOCK_IXATTR 2048 #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ /* client_session ops */ enum { @@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_SIZE 32 #define CEPH_SETATTR_CTIME 64 +/* + * Ceph setxattr request flags. + */ +#define CEPH_XATTR_CREATE 1 +#define CEPH_XATTR_REPLACE 2 + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ +#define CEPH_CAP_SIMPLE_BITS 2 +#define CEPH_CAP_FILE_BITS 8 + /* per-lock shift */ #define CEPH_CAP_SAUTH 2 #define CEPH_CAP_SLINK 4 #define CEPH_CAP_SXATTR 6 #define CEPH_CAP_SFILE 8 -#define CEPH_CAP_SFLOCK 20 +#define CEPH_CAP_SFLOCK 20 -#define CEPH_CAP_BITS 22 +#define CEPH_CAP_BITS 22 /* composed values */ #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) -- cgit v1.2.3 From b4278c961aca320839964e23cfc7906ff61af0c2 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 18 Feb 2013 01:34:55 +0000 Subject: net: proc: remove proc_net_fops_create proc_net_fops_create has been replaced by proc_create, we can remove it now. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- fs/proc/proc_net.c | 8 -------- include/linux/proc_fs.h | 3 --- 2 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index fe72cd073dea..30f7c678424b 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -177,14 +177,6 @@ const struct file_operations proc_net_operations = { .readdir = proc_tgid_net_readdir, }; - -struct proc_dir_entry *proc_net_fops_create(struct net *net, - const char *name, umode_t mode, const struct file_operations *fops) -{ - return proc_create(name, mode, net->proc_net, fops); -} -EXPORT_SYMBOL_GPL(proc_net_fops_create); - void proc_net_remove(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 32676b35d2f5..35ee1891ae25 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -171,8 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name, return res; } -extern struct proc_dir_entry *proc_net_fops_create(struct net *net, - const char *name, umode_t mode, const struct file_operations *fops); extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); @@ -184,7 +182,6 @@ extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); #else -#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) static inline void proc_net_remove(struct net *net, const char *name) {} static inline void proc_flush_task(struct task_struct *task) -- cgit v1.2.3 From c2399059a389ba686fa7f45d8913a708914752c4 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 18 Feb 2013 01:34:57 +0000 Subject: net: proc: remove proc_net_remove proc_net_remove has been replaced by remove_proc_entry. we can remove it now. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- fs/proc/proc_net.c | 6 ------ include/linux/proc_fs.h | 3 --- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 30f7c678424b..3131a03d7d37 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -177,12 +177,6 @@ const struct file_operations proc_net_operations = { .readdir = proc_tgid_net_readdir, }; -void proc_net_remove(struct net *net, const char *name) -{ - remove_proc_entry(name, net->proc_net); -} -EXPORT_SYMBOL_GPL(proc_net_remove); - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 35ee1891ae25..319f69422667 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -171,7 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name, return res; } -extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); @@ -182,8 +181,6 @@ extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); #else -static inline void proc_net_remove(struct net *net, const char *name) {} - static inline void proc_flush_task(struct task_struct *task) { } -- cgit v1.2.3 From 900ff8c6321418dafa03c22e215cb9646a2541b9 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 18 Feb 2013 19:20:33 +0000 Subject: net: move procfs code to net/core/net-procfs.c Similar to net/core/net-sysfs.c, group procfs code to a single unit. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 36 +++- net/core/Makefile | 1 + net/core/dev.c | 384 +----------------------------------------- net/core/dev_addr_lists.c | 74 --------- net/core/net-procfs.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 450 insertions(+), 459 deletions(-) create mode 100644 net/core/net-procfs.c (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 920361bc27e7..f111b4f038f3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2692,9 +2692,9 @@ extern void net_enable_timestamp(void); extern void net_disable_timestamp(void); #ifdef CONFIG_PROC_FS -extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); -extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); -extern void dev_seq_stop(struct seq_file *seq, void *v); +extern int __init dev_proc_init(void); +#else +#define dev_proc_init() 0 #endif extern int netdev_class_create_file(struct class_attribute *class_attr); @@ -2896,4 +2896,34 @@ do { \ }) #endif +/* + * The list of packet types we will receive (as opposed to discard) + * and the routines to invoke. + * + * Why 16. Because with 16 the only overlap we get on a hash of the + * low nibble of the protocol value is RARP/SNAP/X.25. + * + * NOTE: That is no longer true with the addition of VLAN tags. Not + * sure which should go first, but I bet it won't make much + * difference if we are running VLANs. The good news is that + * this protocol won't be in the list unless compiled in, so + * the average user (w/out VLANs) will not be adversely affected. + * --BLG + * + * 0800 IP + * 8100 802.1Q VLAN + * 0001 802.3 + * 0002 AX.25 + * 0004 802.2 + * 8035 RARP + * 0005 SNAP + * 0805 X.25 + * 0806 ARP + * 8137 IPX + * 0009 Localtalk + * 86DD IPv6 + */ +#define PTYPE_HASH_SIZE (16) +#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) + #endif /* _LINUX_NETDEVICE_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 0c5e3618c80b..b33b996f5dd6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -13,6 +13,7 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o +obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/dev.c b/net/core/dev.c index decf55f9ad80..8d9ddb09f208 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -97,8 +97,6 @@ #include #include #include -#include -#include #include #include #include @@ -110,7 +108,6 @@ #include #include #include -#include #include #include #include @@ -141,41 +138,10 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) -/* - * The list of packet types we will receive (as opposed to discard) - * and the routines to invoke. - * - * Why 16. Because with 16 the only overlap we get on a hash of the - * low nibble of the protocol value is RARP/SNAP/X.25. - * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * - * 0800 IP - * 8100 802.1Q VLAN - * 0001 802.3 - * 0002 AX.25 - * 0004 802.2 - * 8035 RARP - * 0005 SNAP - * 0805 X.25 - * 0806 ARP - * 8137 IPX - * 0009 Localtalk - * 86DD IPv6 - */ - -#define PTYPE_HASH_SIZE (16) -#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) - static DEFINE_SPINLOCK(ptype_lock); static DEFINE_SPINLOCK(offload_lock); -static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; -static struct list_head ptype_all __read_mostly; /* Taps */ +struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; +struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; /* @@ -4217,352 +4183,6 @@ softnet_break: goto out; } -#ifdef CONFIG_PROC_FS - -#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) - -#define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) -#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) - -static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - struct net_device *dev; - struct hlist_node *p; - struct hlist_head *h; - unsigned int count = 0, offset = get_offset(*pos); - - h = &net->dev_name_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, p, h, name_hlist) { - if (++count == offset) - return dev; - } - - return NULL; -} - -static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) -{ - struct net_device *dev; - unsigned int bucket; - - do { - dev = dev_from_same_bucket(seq, pos); - if (dev) - return dev; - - bucket = get_bucket(*pos) + 1; - *pos = set_bucket_offset(bucket, 1); - } while (bucket < NETDEV_HASHENTRIES); - - return NULL; -} - -/* - * This is invoked by the /proc filesystem handler to display a device - * in detail. - */ -void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - if (!*pos) - return SEQ_START_TOKEN; - - if (get_bucket(*pos) >= NETDEV_HASHENTRIES) - return NULL; - - return dev_from_bucket(seq, pos); -} - -void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return dev_from_bucket(seq, pos); -} - -void dev_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) -{ - struct rtnl_link_stats64 temp; - const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - - seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " - "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", - dev->name, stats->rx_bytes, stats->rx_packets, - stats->rx_errors, - stats->rx_dropped + stats->rx_missed_errors, - stats->rx_fifo_errors, - stats->rx_length_errors + stats->rx_over_errors + - stats->rx_crc_errors + stats->rx_frame_errors, - stats->rx_compressed, stats->multicast, - stats->tx_bytes, stats->tx_packets, - stats->tx_errors, stats->tx_dropped, - stats->tx_fifo_errors, stats->collisions, - stats->tx_carrier_errors + - stats->tx_aborted_errors + - stats->tx_window_errors + - stats->tx_heartbeat_errors, - stats->tx_compressed); -} - -/* - * Called from the PROCfs module. This now uses the new arbitrary sized - * /proc/net interface to create /proc/net/dev - */ -static int dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); - else - dev_seq_printf_stats(seq, v); - return 0; -} - -static struct softnet_data *softnet_get_online(loff_t *pos) -{ - struct softnet_data *sd = NULL; - - while (*pos < nr_cpu_ids) - if (cpu_online(*pos)) { - sd = &per_cpu(softnet_data, *pos); - break; - } else - ++*pos; - return sd; -} - -static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) -{ - return softnet_get_online(pos); -} - -static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return softnet_get_online(pos); -} - -static void softnet_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int softnet_seq_show(struct seq_file *seq, void *v) -{ - struct softnet_data *sd = v; - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - sd->processed, sd->dropped, sd->time_squeeze, 0, - 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); - return 0; -} - -static const struct seq_operations dev_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_seq_show, -}; - -static int dev_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_seq_fops = { - .owner = THIS_MODULE, - .open = dev_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static const struct seq_operations softnet_seq_ops = { - .start = softnet_seq_start, - .next = softnet_seq_next, - .stop = softnet_seq_stop, - .show = softnet_seq_show, -}; - -static int softnet_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &softnet_seq_ops); -} - -static const struct file_operations softnet_seq_fops = { - .owner = THIS_MODULE, - .open = softnet_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static void *ptype_get_idx(loff_t pos) -{ - struct packet_type *pt = NULL; - loff_t i = 0; - int t; - - list_for_each_entry_rcu(pt, &ptype_all, list) { - if (i == pos) - return pt; - ++i; - } - - for (t = 0; t < PTYPE_HASH_SIZE; t++) { - list_for_each_entry_rcu(pt, &ptype_base[t], list) { - if (i == pos) - return pt; - ++i; - } - } - return NULL; -} - -static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; -} - -static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct packet_type *pt; - struct list_head *nxt; - int hash; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); - - pt = v; - nxt = pt->list.next; - if (pt->type == htons(ETH_P_ALL)) { - if (nxt != &ptype_all) - goto found; - hash = 0; - nxt = ptype_base[0].next; - } else - hash = ntohs(pt->type) & PTYPE_HASH_MASK; - - while (nxt == &ptype_base[hash]) { - if (++hash >= PTYPE_HASH_SIZE) - return NULL; - nxt = ptype_base[hash].next; - } -found: - return list_entry(nxt, struct packet_type, list); -} - -static void ptype_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static int ptype_seq_show(struct seq_file *seq, void *v) -{ - struct packet_type *pt = v; - - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { - if (pt->type == htons(ETH_P_ALL)) - seq_puts(seq, "ALL "); - else - seq_printf(seq, "%04x", ntohs(pt->type)); - - seq_printf(seq, " %-8s %pF\n", - pt->dev ? pt->dev->name : "", pt->func); - } - - return 0; -} - -static const struct seq_operations ptype_seq_ops = { - .start = ptype_seq_start, - .next = ptype_seq_next, - .stop = ptype_seq_stop, - .show = ptype_seq_show, -}; - -static int ptype_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ptype_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ptype_seq_fops = { - .owner = THIS_MODULE, - .open = ptype_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - - -static int __net_init dev_proc_net_init(struct net *net) -{ - int rc = -ENOMEM; - - if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) - goto out; - if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, - &softnet_seq_fops)) - goto out_dev; - if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) - goto out_softnet; - - if (wext_proc_init(net)) - goto out_ptype; - rc = 0; -out: - return rc; -out_ptype: - remove_proc_entry("ptype", net->proc_net); -out_softnet: - remove_proc_entry("softnet_stat", net->proc_net); -out_dev: - remove_proc_entry("dev", net->proc_net); - goto out; -} - -static void __net_exit dev_proc_net_exit(struct net *net) -{ - wext_proc_exit(net); - - remove_proc_entry("ptype", net->proc_net); - remove_proc_entry("softnet_stat", net->proc_net); - remove_proc_entry("dev", net->proc_net); -} - -static struct pernet_operations __net_initdata dev_proc_ops = { - .init = dev_proc_net_init, - .exit = dev_proc_net_exit, -}; - -static int __init dev_proc_init(void) -{ - return register_pernet_subsys(&dev_proc_ops); -} -#else -#define dev_proc_init() 0 -#endif /* CONFIG_PROC_FS */ - - struct netdev_upper { struct net_device *dev; bool master; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 89562529df45..bd2eb9d3e369 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -15,7 +15,6 @@ #include #include #include -#include /* * General list handling functions @@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev) __hw_addr_init(&dev->mc); } EXPORT_SYMBOL(dev_mc_init); - -#ifdef CONFIG_PROC_FS -#include - -static int dev_mc_seq_show(struct seq_file *seq, void *v) -{ - struct netdev_hw_addr *ha; - struct net_device *dev = v; - - if (v == SEQ_START_TOKEN) - return 0; - - netif_addr_lock_bh(dev); - netdev_for_each_mc_addr(ha, dev) { - int i; - - seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, - dev->name, ha->refcount, ha->global_use); - - for (i = 0; i < dev->addr_len; i++) - seq_printf(seq, "%02x", ha->addr[i]); - - seq_putc(seq, '\n'); - } - netif_addr_unlock_bh(dev); - return 0; -} - -static const struct seq_operations dev_mc_seq_ops = { - .start = dev_seq_start, - .next = dev_seq_next, - .stop = dev_seq_stop, - .show = dev_mc_seq_show, -}; - -static int dev_mc_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &dev_mc_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations dev_mc_seq_fops = { - .owner = THIS_MODULE, - .open = dev_mc_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -#endif - -static int __net_init dev_mc_net_init(struct net *net) -{ - if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops)) - return -ENOMEM; - return 0; -} - -static void __net_exit dev_mc_net_exit(struct net *net) -{ - remove_proc_entry("dev_mcast", net->proc_net); -} - -static struct pernet_operations __net_initdata dev_mc_net_ops = { - .init = dev_mc_net_init, - .exit = dev_mc_net_exit, -}; - -void __init dev_mcast_init(void) -{ - register_pernet_subsys(&dev_mc_net_ops); -} - diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c new file mode 100644 index 000000000000..ac87066491e9 --- /dev/null +++ b/net/core/net-procfs.c @@ -0,0 +1,414 @@ +#include +#include +#include +#include + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +extern struct list_head ptype_all __read_mostly; +extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; + unsigned int count = 0, offset = get_offset(*pos); + + h = &net->dev_name_head[get_bucket(*pos)]; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + if (++count == offset) + return dev; + } + + return NULL; +} + +static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) +{ + struct net_device *dev; + unsigned int bucket; + + do { + dev = dev_from_same_bucket(seq, pos); + if (dev) + return dev; + + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + +/* + * This is invoked by the /proc filesystem handler to display a device + * in detail. + */ +static void *dev_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= NETDEV_HASHENTRIES) + return NULL; + + return dev_from_bucket(seq, pos); +} + +static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return dev_from_bucket(seq, pos); +} + +static void dev_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) +{ + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", + dev->name, stats->rx_bytes, stats->rx_packets, + stats->rx_errors, + stats->rx_dropped + stats->rx_missed_errors, + stats->rx_fifo_errors, + stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_frame_errors, + stats->rx_compressed, stats->multicast, + stats->tx_bytes, stats->tx_packets, + stats->tx_errors, stats->tx_dropped, + stats->tx_fifo_errors, stats->collisions, + stats->tx_carrier_errors + + stats->tx_aborted_errors + + stats->tx_window_errors + + stats->tx_heartbeat_errors, + stats->tx_compressed); +} + +/* + * Called from the PROCfs module. This now uses the new arbitrary sized + * /proc/net interface to create /proc/net/dev + */ +static int dev_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Inter-| Receive " + " | Transmit\n" + " face |bytes packets errs drop fifo frame " + "compressed multicast|bytes packets errs " + "drop fifo colls carrier compressed\n"); + else + dev_seq_printf_stats(seq, v); + return 0; +} + +static struct softnet_data *softnet_get_online(loff_t *pos) +{ + struct softnet_data *sd = NULL; + + while (*pos < nr_cpu_ids) + if (cpu_online(*pos)) { + sd = &per_cpu(softnet_data, *pos); + break; + } else + ++*pos; + return sd; +} + +static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) +{ + return softnet_get_online(pos); +} + +static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return softnet_get_online(pos); +} + +static void softnet_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int softnet_seq_show(struct seq_file *seq, void *v) +{ + struct softnet_data *sd = v; + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + sd->processed, sd->dropped, sd->time_squeeze, 0, + 0, 0, 0, 0, /* was fastroute */ + sd->cpu_collision, sd->received_rps); + return 0; +} + +static const struct seq_operations dev_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_seq_show, +}; + +static int dev_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_seq_fops = { + .owner = THIS_MODULE, + .open = dev_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static const struct seq_operations softnet_seq_ops = { + .start = softnet_seq_start, + .next = softnet_seq_next, + .stop = softnet_seq_stop, + .show = softnet_seq_show, +}; + +static int softnet_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &softnet_seq_ops); +} + +static const struct file_operations softnet_seq_fops = { + .owner = THIS_MODULE, + .open = softnet_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *ptype_get_idx(loff_t pos) +{ + struct packet_type *pt = NULL; + loff_t i = 0; + int t; + + list_for_each_entry_rcu(pt, &ptype_all, list) { + if (i == pos) + return pt; + ++i; + } + + for (t = 0; t < PTYPE_HASH_SIZE; t++) { + list_for_each_entry_rcu(pt, &ptype_base[t], list) { + if (i == pos) + return pt; + ++i; + } + } + return NULL; +} + +static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; +} + +static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct packet_type *pt; + struct list_head *nxt; + int hash; + + ++*pos; + if (v == SEQ_START_TOKEN) + return ptype_get_idx(0); + + pt = v; + nxt = pt->list.next; + if (pt->type == htons(ETH_P_ALL)) { + if (nxt != &ptype_all) + goto found; + hash = 0; + nxt = ptype_base[0].next; + } else + hash = ntohs(pt->type) & PTYPE_HASH_MASK; + + while (nxt == &ptype_base[hash]) { + if (++hash >= PTYPE_HASH_SIZE) + return NULL; + nxt = ptype_base[hash].next; + } +found: + return list_entry(nxt, struct packet_type, list); +} + +static void ptype_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static int ptype_seq_show(struct seq_file *seq, void *v) +{ + struct packet_type *pt = v; + + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Type Device Function\n"); + else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + if (pt->type == htons(ETH_P_ALL)) + seq_puts(seq, "ALL "); + else + seq_printf(seq, "%04x", ntohs(pt->type)); + + seq_printf(seq, " %-8s %pF\n", + pt->dev ? pt->dev->name : "", pt->func); + } + + return 0; +} + +static const struct seq_operations ptype_seq_ops = { + .start = ptype_seq_start, + .next = ptype_seq_next, + .stop = ptype_seq_stop, + .show = ptype_seq_show, +}; + +static int ptype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ptype_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations ptype_seq_fops = { + .owner = THIS_MODULE, + .open = ptype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + + +static int __net_init dev_proc_net_init(struct net *net) +{ + int rc = -ENOMEM; + + if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) + goto out; + if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, + &softnet_seq_fops)) + goto out_dev; + if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) + goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; + rc = 0; +out: + return rc; +out_ptype: + remove_proc_entry("ptype", net->proc_net); +out_softnet: + remove_proc_entry("softnet_stat", net->proc_net); +out_dev: + remove_proc_entry("dev", net->proc_net); + goto out; +} + +static void __net_exit dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + remove_proc_entry("ptype", net->proc_net); + remove_proc_entry("softnet_stat", net->proc_net); + remove_proc_entry("dev", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +int __init dev_proc_init(void) +{ + return register_pernet_subsys(&dev_proc_ops); +} + +static int dev_mc_seq_show(struct seq_file *seq, void *v) +{ + struct netdev_hw_addr *ha; + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + return 0; + + netif_addr_lock_bh(dev); + netdev_for_each_mc_addr(ha, dev) { + int i; + + seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, + dev->name, ha->refcount, ha->global_use); + + for (i = 0; i < dev->addr_len; i++) + seq_printf(seq, "%02x", ha->addr[i]); + + seq_putc(seq, '\n'); + } + netif_addr_unlock_bh(dev); + return 0; +} + +static const struct seq_operations dev_mc_seq_ops = { + .start = dev_seq_start, + .next = dev_seq_next, + .stop = dev_seq_stop, + .show = dev_mc_seq_show, +}; + +static int dev_mc_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &dev_mc_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations dev_mc_seq_fops = { + .owner = THIS_MODULE, + .open = dev_mc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static int __net_init dev_mc_net_init(struct net *net) +{ + if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void __net_exit dev_mc_net_exit(struct net *net) +{ + remove_proc_entry("dev_mcast", net->proc_net); +} + +static struct pernet_operations __net_initdata dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + +void __init dev_mcast_init(void) +{ + register_pernet_subsys(&dev_mc_net_ops); +} -- cgit v1.2.3 From 77852fea6e2442a0e654a9292060489895de18c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 16 Feb 2013 09:46:48 +0100 Subject: sched/rt: Add header to IA64 relied on it through sched.h inclusion: arch/ia64/kernel/init_task.c:38:11: error: 'MAX_PRIO' undeclared here (not in a function) arch/ia64/kernel/init_task.c:38:11: error: 'RR_TIMESLICE' undeclared here (not in a function) Reported-by: kbuild test robot Cc: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-xaan1twswggedMR0airtpjui@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index cc898b871cef..5cd0f0949927 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ -- cgit v1.2.3 From 5cd3f5affad2109fd1458aab3f6216f2181e26ea Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 24 Jan 2013 21:53:17 +0100 Subject: lockdep: Silence warning if CONFIG_LOCKDEP isn't set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit c9a4962881929df7f1ef6e63e1b9da304faca4dd ("nfsd: make client_lock per net") compiling nfs4state.o without CONFIG_LOCKDEP set, triggers this GCC warning: fs/nfsd/nfs4state.c: In function ‘free_client’: fs/nfsd/nfs4state.c:1051:19: warning: unused variable ‘nn’ [-Wunused-variable] The cause of that warning is that lockdep_assert_held() compiles away if CONFIG_LOCKDEP is not set. Silence this warning by using the argument to lockdep_assert_held() as a nop if CONFIG_LOCKDEP is not set. Signed-off-by: Paul Bolle Cc: Peter Zijlstra Cc: Stanislav Kinsbursky Cc: J. Bruce Fields Link: http://lkml.kernel.org/r/1359060797.1325.33.camel@x61.thuisdomein Signed-off-by: Ingo Molnar -- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 2bca44b0893c..f05631effc73 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -410,7 +410,7 @@ struct lock_class_key { }; #define lockdep_depth(tsk) (0) -#define lockdep_assert_held(l) do { } while (0) +#define lockdep_assert_held(l) do { (void)(l); } while (0) #define lockdep_recursing(tsk) (0) -- cgit v1.2.3 From eece09ec213e93333010bf4c6bb9175b32229c54 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:25:03 +0200 Subject: locking: Various static lock initializer fixes The static lock initializers want to be fed the proper name of the lock and not some random string. In mainline random strings are obfuscating the readability of debug output, but for RT they prevent the spinlock substitution. Fix it up. Signed-off-by: Thomas Gleixner --- drivers/char/random.c | 6 +++--- drivers/usb/chipidea/debug.c | 2 +- fs/file.c | 2 +- include/linux/idr.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/random.c b/drivers/char/random.c index 85e81ec1451e..594bda9dcfc8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -445,7 +445,7 @@ static struct entropy_store input_pool = { .poolinfo = &poolinfo_table[0], .name = "input", .limit = 1, - .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; @@ -454,7 +454,7 @@ static struct entropy_store blocking_pool = { .name = "blocking", .limit = 1, .pull = &input_pool, - .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), + .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), .pool = blocking_pool_data }; @@ -462,7 +462,7 @@ static struct entropy_store nonblocking_pool = { .poolinfo = &poolinfo_table[1], .name = "nonblocking", .pull = &input_pool, - .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), + .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), .pool = nonblocking_pool_data }; diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c index 3bc244d2636a..a62c4a47d52c 100644 --- a/drivers/usb/chipidea/debug.c +++ b/drivers/usb/chipidea/debug.c @@ -222,7 +222,7 @@ static struct { } dbg_data = { .idx = 0, .tty = 0, - .lck = __RW_LOCK_UNLOCKED(lck) + .lck = __RW_LOCK_UNLOCKED(dbg_data.lck) }; /** diff --git a/fs/file.c b/fs/file.c index 2b3570b7caeb..3906d9577a18 100644 --- a/fs/file.c +++ b/fs/file.c @@ -516,7 +516,7 @@ struct files_struct init_files = { .close_on_exec = init_files.close_on_exec_init, .open_fds = init_files.open_fds_init, }, - .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), + .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), }; /* diff --git a/include/linux/idr.h b/include/linux/idr.h index de7e190f1af4..e5eb125effe6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -136,7 +136,7 @@ struct ida { struct ida_bitmap *free_bitmap; }; -#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } +#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } #define DEFINE_IDA(name) struct ida name = IDA_INIT(name) int ida_pre_get(struct ida *ida, gfp_t gfp_mask); -- cgit v1.2.3 From 65c9d1bbc9f32c568a4f7ad154c9a10b0028df52 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Jul 2011 18:38:22 +0200 Subject: seqlock: Remove unused functions Signed-off-by: Thomas Gleixner --- include/linux/seqlock.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 600060e25ec6..cb0599ca049c 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -69,17 +69,6 @@ static inline void write_sequnlock(seqlock_t *sl) spin_unlock(&sl->lock); } -static inline int write_tryseqlock(seqlock_t *sl) -{ - int ret = spin_trylock(&sl->lock); - - if (ret) { - ++sl->sequence; - smp_wmb(); - } - return ret; -} - /* Start of read calculation -- fetch last complete writer token */ static __always_inline unsigned read_seqbegin(const seqlock_t *sl) { @@ -269,14 +258,4 @@ static inline void write_seqcount_barrier(seqcount_t *s) #define write_sequnlock_bh(lock) \ do { write_sequnlock(lock); local_bh_enable(); } while(0) -#define read_seqbegin_irqsave(lock, flags) \ - ({ local_irq_save(flags); read_seqbegin(lock); }) - -#define read_seqretry_irqrestore(lock, iv, flags) \ - ({ \ - int ret = read_seqretry(lock, iv); \ - local_irq_restore(flags); \ - ret; \ - }) - #endif /* __LINUX_SEQLOCK_H */ -- cgit v1.2.3 From 6617feca15c68aad12f4a1edd8d8e2ef8d5b4ae5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Jul 2011 18:40:26 +0200 Subject: seqlock: Use seqcount infrastructure No point in having different implementations for the same thing. Change the macro mess to inline functions where possible. Signed-off-by: Thomas Gleixner --- include/linux/seqlock.h | 174 +++++++++++++++++++++++++----------------------- 1 file changed, 92 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index cb0599ca049c..18299057402f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -30,81 +30,12 @@ #include #include -typedef struct { - unsigned sequence; - spinlock_t lock; -} seqlock_t; - -/* - * These macros triggered gcc-3.x compile-time problems. We think these are - * OK now. Be cautious. - */ -#define __SEQLOCK_UNLOCKED(lockname) \ - { 0, __SPIN_LOCK_UNLOCKED(lockname) } - -#define seqlock_init(x) \ - do { \ - (x)->sequence = 0; \ - spin_lock_init(&(x)->lock); \ - } while (0) - -#define DEFINE_SEQLOCK(x) \ - seqlock_t x = __SEQLOCK_UNLOCKED(x) - -/* Lock out other writers and update the count. - * Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. - */ -static inline void write_seqlock(seqlock_t *sl) -{ - spin_lock(&sl->lock); - ++sl->sequence; - smp_wmb(); -} - -static inline void write_sequnlock(seqlock_t *sl) -{ - smp_wmb(); - sl->sequence++; - spin_unlock(&sl->lock); -} - -/* Start of read calculation -- fetch last complete writer token */ -static __always_inline unsigned read_seqbegin(const seqlock_t *sl) -{ - unsigned ret; - -repeat: - ret = ACCESS_ONCE(sl->sequence); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - - return ret; -} - -/* - * Test if reader processed invalid data. - * - * If sequence value changed then writer changed data while in section. - */ -static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) -{ - smp_rmb(); - - return unlikely(sl->sequence != start); -} - - /* * Version using sequence counter only. * This can be used when code has its own mutex protecting the * updating starting before the write_seqcountbeqin() and ending * after the write_seqcount_end(). */ - typedef struct seqcount { unsigned sequence; } seqcount_t; @@ -207,7 +138,6 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) { smp_rmb(); - return __read_seqcount_retry(s, start); } @@ -241,21 +171,101 @@ static inline void write_seqcount_barrier(seqcount_t *s) s->sequence+=2; } +typedef struct { + struct seqcount seqcount; + spinlock_t lock; +} seqlock_t; + +/* + * These macros triggered gcc-3.x compile-time problems. We think these are + * OK now. Be cautious. + */ +#define __SEQLOCK_UNLOCKED(lockname) \ + { \ + .seqcount = SEQCNT_ZERO, \ + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ + } + +#define seqlock_init(x) \ + do { \ + seqcount_init(&(x)->seqcount); \ + spin_lock_init(&(x)->lock); \ + } while (0) + +#define DEFINE_SEQLOCK(x) \ + seqlock_t x = __SEQLOCK_UNLOCKED(x) + +/* + * Read side functions for starting and finalizing a read side section. + */ +static inline unsigned read_seqbegin(const seqlock_t *sl) +{ + return read_seqcount_begin(&sl->seqcount); +} + +static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) +{ + return read_seqcount_retry(&sl->seqcount, start); +} + /* - * Possible sw/hw IRQ protected versions of the interfaces. + * Lock out other writers and update the count. + * Acts like a normal spin_lock/unlock. + * Don't need preempt_disable() because that is in the spin_lock already. */ +static inline void write_seqlock(seqlock_t *sl) +{ + spin_lock(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock(&sl->lock); +} + +static inline void write_seqlock_bh(seqlock_t *sl) +{ + spin_lock_bh(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock_bh(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_bh(&sl->lock); +} + +static inline void write_seqlock_irq(seqlock_t *sl) +{ + spin_lock_irq(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock_irq(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_irq(&sl->lock); +} + +static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) +{ + unsigned long flags; + + spin_lock_irqsave(&sl->lock, flags); + write_seqcount_begin(&sl->seqcount); + return flags; +} + #define write_seqlock_irqsave(lock, flags) \ - do { local_irq_save(flags); write_seqlock(lock); } while (0) -#define write_seqlock_irq(lock) \ - do { local_irq_disable(); write_seqlock(lock); } while (0) -#define write_seqlock_bh(lock) \ - do { local_bh_disable(); write_seqlock(lock); } while (0) + do { flags = __write_seqlock_irqsave(lock); } while (0) -#define write_sequnlock_irqrestore(lock, flags) \ - do { write_sequnlock(lock); local_irq_restore(flags); } while(0) -#define write_sequnlock_irq(lock) \ - do { write_sequnlock(lock); local_irq_enable(); } while(0) -#define write_sequnlock_bh(lock) \ - do { write_sequnlock(lock); local_bh_enable(); } while(0) +static inline void +write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_irqrestore(&sl->lock, flags); +} #endif /* __LINUX_SEQLOCK_H */ -- cgit v1.2.3 From 4fc1a601f147abe3bfb4d70fe718110ed21953e1 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Tue, 19 Feb 2013 00:43:10 +0000 Subject: net: proc: fix build failed when procfs is not configured commit d4beaa66add8aebf83ab16d2fde4e4de8dac36df "net: proc: change proc_net_fops_create to proc_create" uses proc_create to replace proc_net_fops_create, when CONFIG_PROC isn't configured, some build error will occurs. net/packet/af_packet.c: In function 'packet_net_init': net/packet/af_packet.c:3831:48: error: 'packet_seq_fops' undeclared (first use in this function) net/packet/af_packet.c:3831:48: note: each undeclared identifier is reported only once for each function it appears in There may be other build fails like above,this patch change proc_create from function to macros when CONFIG_PROC is not configured,just like what proc_net_fops_create did before this commit. Reported-by: Fengguang Wu Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/linux/proc_fs.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 319f69422667..d0a1f2ca1c3f 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -187,12 +187,9 @@ static inline void proc_flush_task(struct task_struct *task) static inline struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode, struct proc_dir_entry *parent) { return NULL; } -static inline struct proc_dir_entry *proc_create(const char *name, - umode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops) -{ - return NULL; -} + +#define proc_create(name, mode, parent, fops) ({ (void)(mode), NULL; }) + static inline struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, void *data) -- cgit v1.2.3 From cd0615746ba0f6643fb984345ae6ee0b73404ca6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 19 Feb 2013 02:47:05 +0000 Subject: net: fix a build failure when !CONFIG_PROC_FS When !CONFIG_PROC_FS dev_mcast_init() is not defined, actually we can just merge dev_mcast_init() into dev_proc_init(). Reported-by: Gao feng Cc: Gao feng Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - net/core/dev.c | 1 - net/core/net-procfs.c | 12 +++++------- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f111b4f038f3..b3d00fa4b314 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2646,7 +2646,6 @@ extern void netdev_notify_peers(struct net_device *dev); extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); -extern void dev_mcast_init(void); extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage); extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, diff --git a/net/core/dev.c b/net/core/dev.c index 8d9ddb09f208..17bc535115d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6260,7 +6260,6 @@ static int __init net_dev_init(void) hotcpu_notifier(dev_cpu_callback, 0); dst_init(); - dev_mcast_init(); rc = 0; out: return rc; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index ac87066491e9..0f6bb6f8d391 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -341,11 +341,6 @@ static struct pernet_operations __net_initdata dev_proc_ops = { .exit = dev_proc_net_exit, }; -int __init dev_proc_init(void) -{ - return register_pernet_subsys(&dev_proc_ops); -} - static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct netdev_hw_addr *ha; @@ -408,7 +403,10 @@ static struct pernet_operations __net_initdata dev_mc_net_ops = { .exit = dev_mc_net_exit, }; -void __init dev_mcast_init(void) +int __init dev_proc_init(void) { - register_pernet_subsys(&dev_mc_net_ops); + int ret = register_pernet_subsys(&dev_proc_ops); + if (!ret) + return register_pernet_subsys(&dev_mc_net_ops); + return ret; } -- cgit v1.2.3 From f84adf4921ae3115502f44ff467b04bf2f88cf04 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 13 Feb 2013 13:01:55 -0500 Subject: xen-blkfront: drop the use of llist_for_each_entry_safe Replace llist_for_each_entry_safe with a while loop. llist_for_each_entry_safe can trigger a bug in GCC 4.1, so it's best to remove it and use a while loop and do the deletion manually. Specifically this bug can be triggered by hot-unplugging a disk, either by doing xm block-detach or by save/restore cycle. BUG: unable to handle kernel paging request at fffffffffffffff0 IP: [] blkif_free+0x63/0x130 [xen_blkfront] The crash call trace is: ... bad_area_nosemaphore+0x13/0x20 do_page_fault+0x25e/0x4b0 page_fault+0x25/0x30 ? blkif_free+0x63/0x130 [xen_blkfront] blkfront_resume+0x46/0xa0 [xen_blkfront] xenbus_dev_resume+0x6c/0x140 pm_op+0x192/0x1b0 device_resume+0x82/0x1e0 dpm_resume+0xc9/0x1a0 dpm_resume_end+0x15/0x30 do_suspend+0x117/0x1e0 When drilling down to the assembler code, on newer GCC it does .L29: cmpq $-16, %r12 #, persistent_gnt check je .L30 #, out of the loop .L25: ... code in the loop testq %r13, %r13 # n je .L29 #, back to the top of the loop cmpq $-16, %r12 #, persistent_gnt check movq 16(%r12), %r13 # .node.next, n jne .L25 #, back to the top of the loop .L30: While on GCC 4.1, it is: L78: ... code in the loop testq %r13, %r13 # n je .L78 #, back to the top of the loop movq 16(%rbx), %r13 # .node.next, n jmp .L78 #, back to the top of the loop Which basically means that the exit loop condition instead of being: &(pos)->member != NULL; is: ; which makes the loop unbound. Since xen-blkfront is the only user of the llist_for_each_entry_safe macro remove it from llist.h. Orabug: 16263164 CC: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 13 ++++++++++--- include/linux/llist.h | 25 ------------------------- 2 files changed, 10 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 11043c18ac5a..c3dae2e0f290 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; - struct grant *persistent_gnt; + struct grant *persistent_gnt, *tmp; struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ @@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { + persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); + while (persistent_gnt) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); + tmp = persistent_gnt; + n = persistent_gnt->node.next; + if (n) + persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); + else + persistent_gnt = NULL; + kfree(tmp); } info->persistent_gnts_c = 0; } diff --git a/include/linux/llist.h b/include/linux/llist.h index d0ab98f73d38..a5199f6d0e82 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -124,31 +124,6 @@ static inline void init_llist_head(struct llist_head *list) &(pos)->member != NULL; \ (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) -/** - * llist_for_each_entry_safe - iterate safely against remove over some entries - * of lock-less list of given type. - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as a temporary storage. - * @node: the fist entry of deleted list entries. - * @member: the name of the llist_node with the struct. - * - * In general, some entries of the lock-less list can be traversed - * safely only after being removed from list, so start with an entry - * instead of list head. This variant allows removal of entries - * as we iterate. - * - * If being used on entries deleted from lock-less list directly, the - * traverse order is from the newest to the oldest added entry. If - * you want to traverse from the oldest to the newest, you must - * reverse the order by yourself before traversing. - */ -#define llist_for_each_entry_safe(pos, n, node, member) \ - for ((pos) = llist_entry((node), typeof(*(pos)), member), \ - (n) = (pos)->member.next; \ - &(pos)->member != NULL; \ - (pos) = llist_entry(n, typeof(*(pos)), member), \ - (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) - /** * llist_empty - tests whether a lock-less list is empty * @head: the list to test -- cgit v1.2.3 From b324814e8436772cb3367b14149ba003a9954525 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Feb 2013 13:11:38 -0600 Subject: libceph: use void pointers in page vector functions The functions used for working with ceph page vectors are defined with char pointers, but they're really intended to operate on untyped data. Change the types of these function parameters to (void *) to reflect this. (Note that the functions now assume void pointer arithmetic works like arithmetic on char pointers.) Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/libceph.h | 10 +++++----- net/ceph/pagevec.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c44275ab375c..2250f8bb2490 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -222,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); -extern struct page **ceph_get_direct_page_vector(const char __user *data, +extern struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page); extern void ceph_put_page_vector(struct page **pages, int num_pages, @@ -230,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages, extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len); extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, + const void *data, loff_t off, size_t len); extern int ceph_copy_from_page_vector(struct page **pages, - char *data, + void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, +extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index cd9c21df87d1..5b20be979c19 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -12,7 +12,7 @@ /* * build a vector of user pages */ -struct page **ceph_get_direct_page_vector(const char __user *data, +struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page) { struct page **pages; @@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector); * copy user data into a page vector */ int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len) { int i = 0; @@ -119,7 +119,7 @@ int ceph_copy_user_to_page_vector(struct page **pages, EXPORT_SYMBOL(ceph_copy_user_to_page_vector); int ceph_copy_to_page_vector(struct page **pages, - const char *data, + const void *data, loff_t off, size_t len) { int i = 0; @@ -143,7 +143,7 @@ int ceph_copy_to_page_vector(struct page **pages, EXPORT_SYMBOL(ceph_copy_to_page_vector); int ceph_copy_from_page_vector(struct page **pages, - char *data, + void *data, loff_t off, size_t len) { int i = 0; @@ -170,7 +170,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector); * copy user data from a page vector into a user pointer */ int ceph_copy_page_vector_to_user(struct page **pages, - char __user *data, + void __user *data, loff_t off, size_t len) { int i = 0; -- cgit v1.2.3 From 903bb32e890237ca43ab847e561e5377cfe0fdb3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Feb 2013 13:11:38 -0600 Subject: libceph: drop return value from page vector copy routines The return values provided for ceph_copy_to_page_vector() and ceph_copy_from_page_vector() serve no purpose, so get rid of them. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 5 ++--- include/linux/ceph/libceph.h | 4 ++-- net/ceph/pagevec.c | 14 ++++++-------- 3 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c259b4089e95..b0eea3eaee93 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1890,8 +1890,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, if (ret < 0) goto out; ret = 0; - (void) ceph_copy_from_page_vector(pages, inbound, 0, - obj_request->xferred); + ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); if (version) *version = obj_request->version; out: @@ -2089,7 +2088,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); size = (size_t) obj_request->xferred; - (void) ceph_copy_from_page_vector(pages, buf, 0, size); + ceph_copy_from_page_vector(pages, buf, 0, size); rbd_assert(size <= (size_t) INT_MAX); ret = (int) size; if (version) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2250f8bb2490..29818fc3fa49 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -232,10 +232,10 @@ extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, const void __user *data, loff_t off, size_t len); -extern int ceph_copy_to_page_vector(struct page **pages, +extern void ceph_copy_to_page_vector(struct page **pages, const void *data, loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, +extern void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len); extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 5b20be979c19..815a2249cfa9 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages, } EXPORT_SYMBOL(ceph_copy_user_to_page_vector); -int ceph_copy_to_page_vector(struct page **pages, +void ceph_copy_to_page_vector(struct page **pages, const void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(page_address(pages[i]) + po, data, l); data += l; left -= l; @@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_to_page_vector); -int ceph_copy_from_page_vector(struct page **pages, +void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(data, page_address(pages[i]) + po, l); data += l; left -= l; @@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_from_page_vector); -- cgit v1.2.3 From 1c3e826482ab698e418c7a894440e62c76aac893 Mon Sep 17 00:00:00 2001 From: Sha Zhengju Date: Wed, 20 Feb 2013 17:14:38 +0800 Subject: sched/core: Remove the obsolete and unused nr_uninterruptible() function Signed-off-by: Sha Zhengju Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1361351678-8065-1-git-send-email-handai.szj@taobao.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/sched/core.c | 22 ++-------------------- 2 files changed, 2 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33cc42130371..f9ca237df7e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -98,7 +98,6 @@ extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); -extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 03d7784b7bd2..b7b03cd2d4cd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1969,11 +1969,10 @@ context_switch(struct rq *rq, struct task_struct *prev, } /* - * nr_running, nr_uninterruptible and nr_context_switches: + * nr_running and nr_context_switches: * * externally visible scheduler statistics: current number of runnable - * threads, current number of uninterruptible-sleeping threads, total - * number of context switches performed since bootup. + * threads, total number of context switches performed since bootup. */ unsigned long nr_running(void) { @@ -1985,23 +1984,6 @@ unsigned long nr_running(void) return sum; } -unsigned long nr_uninterruptible(void) -{ - unsigned long i, sum = 0; - - for_each_possible_cpu(i) - sum += cpu_rq(i)->nr_uninterruptible; - - /* - * Since we read the counters lockless, it might be slightly - * inaccurate. Do not allow it to go below zero though: - */ - if (unlikely((long)sum < 0)) - sum = 0; - - return sum; -} - unsigned long long nr_context_switches(void) { int i; -- cgit v1.2.3 From 55e301fd57a6239ec14b91a1cf2e70b3dd135194 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 29 Jan 2013 06:04:50 +0000 Subject: Btrfs: move fs/btrfs/ioctl.h to include/uapi/linux/btrfs.h The header file will then be installed under /usr/include/linux so that userspace applications can refer to Btrfs ioctls by name and use the same structs used internally in the kernel. Signed-off-by: Filipe Brandenburger Signed-off-by: Josef Bacik --- fs/btrfs/backref.h | 2 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/ioctl.h | 502 -------------------------------------------- fs/btrfs/qgroup.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/volumes.h | 2 +- include/linux/btrfs.h | 6 + include/uapi/linux/Kbuild | 1 + include/uapi/linux/btrfs.h | 503 +++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 518 insertions(+), 510 deletions(-) delete mode 100644 fs/btrfs/ioctl.h create mode 100644 include/linux/btrfs.h create mode 100644 include/uapi/linux/btrfs.h (limited to 'include/linux') diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index d61feca79455..310a7f6d09b1 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -19,7 +19,7 @@ #ifndef __BTRFS_BACKREF__ #define __BTRFS_BACKREF__ -#include "ioctl.h" +#include #include "ulist.h" #include "extent_io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 541ce9a9949e..69321013683c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -31,10 +31,10 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" -#include "ioctl.h" struct btrfs_trans_handle; struct btrfs_transaction; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 083abca56055..13c78ea3ebce 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -30,11 +30,11 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "tree-log.h" #include "locking.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 60ec7589900c..fc8aa8bf80a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -39,12 +39,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "ordered-data.h" #include "xattr.h" diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1b554b47e814..96ecefc1724f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -42,12 +42,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "volumes.h" #include "locking.h" diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h deleted file mode 100644 index dabca9cc8c2e..000000000000 --- a/fs/btrfs/ioctl.h +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __IOCTL_ -#define __IOCTL_ -#include - -#define BTRFS_IOCTL_MAGIC 0x94 -#define BTRFS_VOL_NAME_MAX 255 - -/* this should be 4k */ -#define BTRFS_PATH_NAME_MAX 4087 -struct btrfs_ioctl_vol_args { - __s64 fd; - char name[BTRFS_PATH_NAME_MAX + 1]; -}; - -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 - -#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) -#define BTRFS_SUBVOL_RDONLY (1ULL << 1) -#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) -#define BTRFS_FSID_SIZE 16 -#define BTRFS_UUID_SIZE 16 - -#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) - -struct btrfs_qgroup_limit { - __u64 flags; - __u64 max_rfer; - __u64 max_excl; - __u64 rsv_rfer; - __u64 rsv_excl; -}; - -struct btrfs_qgroup_inherit { - __u64 flags; - __u64 num_qgroups; - __u64 num_ref_copies; - __u64 num_excl_copies; - struct btrfs_qgroup_limit lim; - __u64 qgroups[0]; -}; - -struct btrfs_ioctl_qgroup_limit_args { - __u64 qgroupid; - struct btrfs_qgroup_limit lim; -}; - -#define BTRFS_SUBVOL_NAME_MAX 4039 -struct btrfs_ioctl_vol_args_v2 { - __s64 fd; - __u64 transid; - __u64 flags; - union { - struct { - __u64 size; - struct btrfs_qgroup_inherit __user *qgroup_inherit; - }; - __u64 unused[4]; - }; - char name[BTRFS_SUBVOL_NAME_MAX + 1]; -}; - -/* - * structure to report errors and progress to userspace, either as a - * result of a finished scrub, a canceled scrub or a progress inquiry - */ -struct btrfs_scrub_progress { - __u64 data_extents_scrubbed; /* # of data extents scrubbed */ - __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ - __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ - __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ - __u64 read_errors; /* # of read errors encountered (EIO) */ - __u64 csum_errors; /* # of failed csum checks */ - __u64 verify_errors; /* # of occurences, where the metadata - * of a tree block did not match the - * expected values, like generation or - * logical */ - __u64 no_csum; /* # of 4k data block for which no csum - * is present, probably the result of - * data written with nodatasum */ - __u64 csum_discards; /* # of csum for which no data was found - * in the extent tree. */ - __u64 super_errors; /* # of bad super blocks encountered */ - __u64 malloc_errors; /* # of internal kmalloc errors. These - * will likely cause an incomplete - * scrub */ - __u64 uncorrectable_errors; /* # of errors where either no intact - * copy was found or the writeback - * failed */ - __u64 corrected_errors; /* # of errors corrected */ - __u64 last_physical; /* last physical address scrubbed. In - * case a scrub was aborted, this can - * be used to restart the scrub */ - __u64 unverified_errors; /* # of occurences where a read for a - * full (64k) bio failed, but the re- - * check succeeded for each 4k piece. - * Intermittent error. */ -}; - -#define BTRFS_SCRUB_READONLY 1 -struct btrfs_ioctl_scrub_args { - __u64 devid; /* in */ - __u64 start; /* in */ - __u64 end; /* in */ - __u64 flags; /* in */ - struct btrfs_scrub_progress progress; /* out */ - /* pad to 1k */ - __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 -struct btrfs_ioctl_dev_replace_start_params { - __u64 srcdevid; /* in, if 0, use srcdev_name instead */ - __u64 cont_reading_from_srcdev_mode; /* in, see #define - * above */ - __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ - __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 -struct btrfs_ioctl_dev_replace_status_params { - __u64 replace_state; /* out, see #define above */ - __u64 progress_1000; /* out, 0 <= x <= 1000 */ - __u64 time_started; /* out, seconds since 1-Jan-1970 */ - __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ - __u64 num_write_errors; /* out */ - __u64 num_uncorrectable_read_errors; /* out */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 -struct btrfs_ioctl_dev_replace_args { - __u64 cmd; /* in */ - __u64 result; /* out */ - - union { - struct btrfs_ioctl_dev_replace_start_params start; - struct btrfs_ioctl_dev_replace_status_params status; - }; /* in/out */ - - __u64 spare[64]; -}; - -struct btrfs_ioctl_dev_info_args { - __u64 devid; /* in/out */ - __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ - __u64 bytes_used; /* out */ - __u64 total_bytes; /* out */ - __u64 unused[379]; /* pad to 4k */ - __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ -}; - -struct btrfs_ioctl_fs_info_args { - __u64 max_id; /* out */ - __u64 num_devices; /* out */ - __u8 fsid[BTRFS_FSID_SIZE]; /* out */ - __u64 reserved[124]; /* pad to 1k */ -}; - -/* balance control ioctl modes */ -#define BTRFS_BALANCE_CTL_PAUSE 1 -#define BTRFS_BALANCE_CTL_CANCEL 2 - -/* - * this is packed, because it should be exactly the same as its disk - * byte order counterpart (struct btrfs_disk_balance_args) - */ -struct btrfs_balance_args { - __u64 profiles; - __u64 usage; - __u64 devid; - __u64 pstart; - __u64 pend; - __u64 vstart; - __u64 vend; - - __u64 target; - - __u64 flags; - - __u64 unused[8]; -} __attribute__ ((__packed__)); - -/* report balance progress to userspace */ -struct btrfs_balance_progress { - __u64 expected; /* estimated # of chunks that will be - * relocated to fulfill the request */ - __u64 considered; /* # of chunks we have considered so far */ - __u64 completed; /* # of chunks relocated so far */ -}; - -#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) -#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) -#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) - -struct btrfs_ioctl_balance_args { - __u64 flags; /* in/out */ - __u64 state; /* out */ - - struct btrfs_balance_args data; /* in/out */ - struct btrfs_balance_args meta; /* in/out */ - struct btrfs_balance_args sys; /* in/out */ - - struct btrfs_balance_progress stat; /* out */ - - __u64 unused[72]; /* pad to 1k */ -}; - -#define BTRFS_INO_LOOKUP_PATH_MAX 4080 -struct btrfs_ioctl_ino_lookup_args { - __u64 treeid; - __u64 objectid; - char name[BTRFS_INO_LOOKUP_PATH_MAX]; -}; - -struct btrfs_ioctl_search_key { - /* which root are we searching. 0 is the tree of tree roots */ - __u64 tree_id; - - /* keys returned will be >= min and <= max */ - __u64 min_objectid; - __u64 max_objectid; - - /* keys returned will be >= min and <= max */ - __u64 min_offset; - __u64 max_offset; - - /* max and min transids to search for */ - __u64 min_transid; - __u64 max_transid; - - /* keys returned will be >= min and <= max */ - __u32 min_type; - __u32 max_type; - - /* - * how many items did userland ask for, and how many are we - * returning - */ - __u32 nr_items; - - /* align to 64 bits */ - __u32 unused; - - /* some extra for later */ - __u64 unused1; - __u64 unused2; - __u64 unused3; - __u64 unused4; -}; - -struct btrfs_ioctl_search_header { - __u64 transid; - __u64 objectid; - __u64 offset; - __u32 type; - __u32 len; -}; - -#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) -/* - * the buf is an array of search headers where - * each header is followed by the actual item - * the type field is expanded to 32 bits for alignment - */ -struct btrfs_ioctl_search_args { - struct btrfs_ioctl_search_key key; - char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; -}; - -struct btrfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_offset, src_length; - __u64 dest_offset; -}; - -/* flags for the defrag range ioctl */ -#define BTRFS_DEFRAG_RANGE_COMPRESS 1 -#define BTRFS_DEFRAG_RANGE_START_IO 2 - -struct btrfs_ioctl_space_info { - __u64 flags; - __u64 total_bytes; - __u64 used_bytes; -}; - -struct btrfs_ioctl_space_args { - __u64 space_slots; - __u64 total_spaces; - struct btrfs_ioctl_space_info spaces[0]; -}; - -struct btrfs_data_container { - __u32 bytes_left; /* out -- bytes not needed to deliver output */ - __u32 bytes_missing; /* out -- additional bytes needed for result */ - __u32 elem_cnt; /* out */ - __u32 elem_missed; /* out */ - __u64 val[0]; /* out */ -}; - -struct btrfs_ioctl_ino_path_args { - __u64 inum; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *fspath; out */ - __u64 fspath; /* out */ -}; - -struct btrfs_ioctl_logical_ino_args { - __u64 logical; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *inodes; out */ - __u64 inodes; -}; - -enum btrfs_dev_stat_values { - /* disk I/O failure stats */ - BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ - - /* stats for indirect indications for I/O failures */ - BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or - * contents is illegal: this is an - * indication that the block was damaged - * during read or write, or written to - * wrong location or read from wrong - * location */ - BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not - * been written */ - - BTRFS_DEV_STAT_VALUES_MAX -}; - -/* Reset statistics after reading; needs SYS_ADMIN capability */ -#define BTRFS_DEV_STATS_RESET (1ULL << 0) - -struct btrfs_ioctl_get_dev_stats { - __u64 devid; /* in */ - __u64 nr_items; /* in/out */ - __u64 flags; /* in/out */ - - /* out values: */ - __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; - - __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ -}; - -#define BTRFS_QUOTA_CTL_ENABLE 1 -#define BTRFS_QUOTA_CTL_DISABLE 2 -#define BTRFS_QUOTA_CTL_RESCAN 3 -struct btrfs_ioctl_quota_ctl_args { - __u64 cmd; - __u64 status; -}; - -struct btrfs_ioctl_qgroup_assign_args { - __u64 assign; - __u64 src; - __u64 dst; -}; - -struct btrfs_ioctl_qgroup_create_args { - __u64 create; - __u64 qgroupid; -}; -struct btrfs_ioctl_timespec { - __u64 sec; - __u32 nsec; -}; - -struct btrfs_ioctl_received_subvol_args { - char uuid[BTRFS_UUID_SIZE]; /* in */ - __u64 stransid; /* in */ - __u64 rtransid; /* out */ - struct btrfs_ioctl_timespec stime; /* in */ - struct btrfs_ioctl_timespec rtime; /* out */ - __u64 flags; /* in */ - __u64 reserved[16]; /* in */ -}; - -struct btrfs_ioctl_send_args { - __s64 send_fd; /* in */ - __u64 clone_sources_count; /* in */ - __u64 __user *clone_sources; /* in */ - __u64 parent_root; /* in */ - __u64 flags; /* in */ - __u64 reserved[4]; /* in */ -}; - -#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ - struct btrfs_ioctl_vol_args) -/* trans start and trans end are dangerous, and only for - * use by applications that know how to avoid the - * resulting deadlocks - */ -#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) -#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) -#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) - -#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) -#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ - struct btrfs_ioctl_vol_args) - -#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ - struct btrfs_ioctl_clone_range_args) - -#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ - struct btrfs_ioctl_defrag_range_args) -#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ - struct btrfs_ioctl_search_args) -#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ - struct btrfs_ioctl_ino_lookup_args) -#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) -#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ - struct btrfs_ioctl_space_args) -#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) -#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) -#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) -#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) -#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) -#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ - struct btrfs_ioctl_dev_info_args) -#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ - struct btrfs_ioctl_fs_info_args) -#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) -#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ - struct btrfs_ioctl_received_subvol_args) -#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) -#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ - struct btrfs_ioctl_quota_ctl_args) -#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ - struct btrfs_ioctl_qgroup_assign_args) -#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ - struct btrfs_ioctl_qgroup_create_args) -#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ - struct btrfs_ioctl_qgroup_limit_args) -#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ - struct btrfs_ioctl_get_dev_stats) -#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ - struct btrfs_ioctl_dev_replace_args) - -#endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a5c856234323..a0d6368249fa 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -23,13 +23,13 @@ #include #include #include +#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" #include "locking.h" #include "ulist.h" -#include "ioctl.h" #include "backref.h" /* TODO XXX FIXME diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67b373bf3ff9..6846ededfe95 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,13 +41,13 @@ #include #include #include +#include #include "compat.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "xattr.h" #include "volumes.h" diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d3c3939ac751..12bb84166a5f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,8 +21,8 @@ #include #include +#include #include "async-thread.h" -#include "ioctl.h" #define BTRFS_STRIPE_LEN (64 * 1024) diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h new file mode 100644 index 000000000000..22d799147db2 --- /dev/null +++ b/include/linux/btrfs.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_BTRFS_H +#define _LINUX_BTRFS_H + +#include + +#endif /* _LINUX_BTRFS_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 19e765fbfef7..896ee1247294 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -68,6 +68,7 @@ header-y += blkpg.h header-y += blktrace_api.h header-y += bpqether.h header-y += bsg.h +header-y += btrfs.h header-y += can.h header-y += capability.h header-y += capi.h diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h new file mode 100644 index 000000000000..cffbb582dd90 --- /dev/null +++ b/include/uapi/linux/btrfs.h @@ -0,0 +1,503 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef _UAPI_LINUX_BTRFS_H +#define _UAPI_LINUX_BTRFS_H +#include +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 + +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 + +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_rfer; + __u64 max_excl; + __u64 rsv_rfer; + __u64 rsv_excl; +}; + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[0]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + +#define BTRFS_SUBVOL_NAME_MAX 4039 +struct btrfs_ioctl_vol_args_v2 { + __s64 fd; + __u64 transid; + __u64 flags; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit __user *qgroup_inherit; + }; + __u64 unused[4]; + }; + char name[BTRFS_SUBVOL_NAME_MAX + 1]; +}; + +/* + * structure to report errors and progress to userspace, either as a + * result of a finished scrub, a canceled scrub or a progress inquiry + */ +struct btrfs_scrub_progress { + __u64 data_extents_scrubbed; /* # of data extents scrubbed */ + __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ + __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ + __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ + __u64 read_errors; /* # of read errors encountered (EIO) */ + __u64 csum_errors; /* # of failed csum checks */ + __u64 verify_errors; /* # of occurences, where the metadata + * of a tree block did not match the + * expected values, like generation or + * logical */ + __u64 no_csum; /* # of 4k data block for which no csum + * is present, probably the result of + * data written with nodatasum */ + __u64 csum_discards; /* # of csum for which no data was found + * in the extent tree. */ + __u64 super_errors; /* # of bad super blocks encountered */ + __u64 malloc_errors; /* # of internal kmalloc errors. These + * will likely cause an incomplete + * scrub */ + __u64 uncorrectable_errors; /* # of errors where either no intact + * copy was found or the writeback + * failed */ + __u64 corrected_errors; /* # of errors corrected */ + __u64 last_physical; /* last physical address scrubbed. In + * case a scrub was aborted, this can + * be used to restart the scrub */ + __u64 unverified_errors; /* # of occurences where a read for a + * full (64k) bio failed, but the re- + * check succeeded for each 4k piece. + * Intermittent error. */ +}; + +#define BTRFS_SCRUB_READONLY 1 +struct btrfs_ioctl_scrub_args { + __u64 devid; /* in */ + __u64 start; /* in */ + __u64 end; /* in */ + __u64 flags; /* in */ + struct btrfs_scrub_progress progress; /* out */ + /* pad to 1k */ + __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 +struct btrfs_ioctl_dev_replace_start_params { + __u64 srcdevid; /* in, if 0, use srcdev_name instead */ + __u64 cont_reading_from_srcdev_mode; /* in, see #define + * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 +struct btrfs_ioctl_dev_replace_status_params { + __u64 replace_state; /* out, see #define above */ + __u64 progress_1000; /* out, 0 <= x <= 1000 */ + __u64 time_started; /* out, seconds since 1-Jan-1970 */ + __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ + __u64 num_write_errors; /* out */ + __u64 num_uncorrectable_read_errors; /* out */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 +struct btrfs_ioctl_dev_replace_args { + __u64 cmd; /* in */ + __u64 result; /* out */ + + union { + struct btrfs_ioctl_dev_replace_start_params start; + struct btrfs_ioctl_dev_replace_status_params status; + }; /* in/out */ + + __u64 spare[64]; +}; + +struct btrfs_ioctl_dev_info_args { + __u64 devid; /* in/out */ + __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ + __u64 bytes_used; /* out */ + __u64 total_bytes; /* out */ + __u64 unused[379]; /* pad to 4k */ + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ +}; + +struct btrfs_ioctl_fs_info_args { + __u64 max_id; /* out */ + __u64 num_devices; /* out */ + __u8 fsid[BTRFS_FSID_SIZE]; /* out */ + __u64 reserved[124]; /* pad to 1k */ +}; + +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 +#define BTRFS_BALANCE_CTL_CANCEL 2 + +/* + * this is packed, because it should be exactly the same as its disk + * byte order counterpart (struct btrfs_disk_balance_args) + */ +struct btrfs_balance_args { + __u64 profiles; + __u64 usage; + __u64 devid; + __u64 pstart; + __u64 pend; + __u64 vstart; + __u64 vend; + + __u64 target; + + __u64 flags; + + __u64 unused[8]; +} __attribute__ ((__packed__)); + +/* report balance progress to userspace */ +struct btrfs_balance_progress { + __u64 expected; /* estimated # of chunks that will be + * relocated to fulfill the request */ + __u64 considered; /* # of chunks we have considered so far */ + __u64 completed; /* # of chunks relocated so far */ +}; + +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) +#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) + +struct btrfs_ioctl_balance_args { + __u64 flags; /* in/out */ + __u64 state; /* out */ + + struct btrfs_balance_args data; /* in/out */ + struct btrfs_balance_args meta; /* in/out */ + struct btrfs_balance_args sys; /* in/out */ + + struct btrfs_balance_progress stat; /* out */ + + __u64 unused[72]; /* pad to 1k */ +}; + +#define BTRFS_INO_LOOKUP_PATH_MAX 4080 +struct btrfs_ioctl_ino_lookup_args { + __u64 treeid; + __u64 objectid; + char name[BTRFS_INO_LOOKUP_PATH_MAX]; +}; + +struct btrfs_ioctl_search_key { + /* which root are we searching. 0 is the tree of tree roots */ + __u64 tree_id; + + /* keys returned will be >= min and <= max */ + __u64 min_objectid; + __u64 max_objectid; + + /* keys returned will be >= min and <= max */ + __u64 min_offset; + __u64 max_offset; + + /* max and min transids to search for */ + __u64 min_transid; + __u64 max_transid; + + /* keys returned will be >= min and <= max */ + __u32 min_type; + __u32 max_type; + + /* + * how many items did userland ask for, and how many are we + * returning + */ + __u32 nr_items; + + /* align to 64 bits */ + __u32 unused; + + /* some extra for later */ + __u64 unused1; + __u64 unused2; + __u64 unused3; + __u64 unused4; +}; + +struct btrfs_ioctl_search_header { + __u64 transid; + __u64 objectid; + __u64 offset; + __u32 type; + __u32 len; +}; + +#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) +/* + * the buf is an array of search headers where + * each header is followed by the actual item + * the type field is expanded to 32 bits for alignment + */ +struct btrfs_ioctl_search_args { + struct btrfs_ioctl_search_key key; + char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; +}; + +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +/* flags for the defrag range ioctl */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 + +struct btrfs_ioctl_space_info { + __u64 flags; + __u64 total_bytes; + __u64 used_bytes; +}; + +struct btrfs_ioctl_space_args { + __u64 space_slots; + __u64 total_spaces; + struct btrfs_ioctl_space_info spaces[0]; +}; + +struct btrfs_data_container { + __u32 bytes_left; /* out -- bytes not needed to deliver output */ + __u32 bytes_missing; /* out -- additional bytes needed for result */ + __u32 elem_cnt; /* out */ + __u32 elem_missed; /* out */ + __u64 val[0]; /* out */ +}; + +struct btrfs_ioctl_ino_path_args { + __u64 inum; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *fspath; out */ + __u64 fspath; /* out */ +}; + +struct btrfs_ioctl_logical_ino_args { + __u64 logical; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *inodes; out */ + __u64 inodes; +}; + +enum btrfs_dev_stat_values { + /* disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + +struct btrfs_ioctl_get_dev_stats { + __u64 devid; /* in */ + __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ + + /* out values: */ + __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; + + __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ +}; + +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +#define BTRFS_QUOTA_CTL_RESCAN 3 +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 __user *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) +#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args) +#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ + struct btrfs_ioctl_ino_lookup_args) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) +#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) +#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) +#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) +#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ + struct btrfs_ioctl_dev_info_args) +#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ + struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) +#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ + struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_dev_replace_args) + +#endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Mon, 13 Aug 2012 17:25:44 +0200 Subject: lib/lzo: Update LZO compression to current upstream version This commit updates the kernel LZO code to the current upsteam version which features a significant speed improvement - benchmarking the Calgary and Silesia test corpora typically shows a doubled performance in both compression and decompression on modern i386/x86_64/powerpc machines. Signed-off-by: Markus F.X.J. Oberhumer --- include/linux/lzo.h | 15 +- lib/lzo/lzo1x_compress.c | 335 ++++++++++++++++++++++---------------- lib/lzo/lzo1x_decompress_safe.c | 350 +++++++++++++++++++--------------------- lib/lzo/lzodefs.h | 38 +++-- 4 files changed, 395 insertions(+), 343 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lzo.h b/include/linux/lzo.h index d793497ec1ca..a0848d9377e5 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -4,28 +4,28 @@ * LZO Public Kernel Interface * A mini subset of the LZO real-time data compression library * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) -#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS +#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) -/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzo1x_1_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); + unsigned char *dst, size_t *dst_len, void *wrkmem); /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len); + unsigned char *dst, size_t *dst_len); /* * Return values (< 0 = Error) @@ -40,5 +40,6 @@ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, #define LZO_E_EOF_NOT_FOUND (-7) #define LZO_E_INPUT_NOT_CONSUMED (-8) #define LZO_E_NOT_YET_IMPLEMENTED (-9) +#define LZO_E_INVALID_ARGUMENT (-10) #endif diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index a6040990a62e..236eb21167b5 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -1,194 +1,243 @@ /* - * LZO1X Compressor from MiniLZO + * LZO1X Compressor from LZO * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ #include #include -#include #include +#include #include "lzodefs.h" static noinline size_t -_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len, void *wrkmem) +lzo1x_1_do_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + size_t ti, void *wrkmem) { + const unsigned char *ip; + unsigned char *op; const unsigned char * const in_end = in + in_len; - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; - const unsigned char ** const dict = wrkmem; - const unsigned char *ip = in, *ii = ip; - const unsigned char *end, *m, *m_pos; - size_t m_off, m_len, dindex; - unsigned char *op = out; + const unsigned char * const ip_end = in + in_len - 20; + const unsigned char *ii; + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; - ip += 4; + op = out; + ip = in; + ii = ip; + ip += ti < 4 ? 4 - ti : 0; for (;;) { - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - goto literal; - -try_match: - if (get_unaligned((const unsigned short *)m_pos) - == get_unaligned((const unsigned short *)ip)) { - if (likely(m_pos[2] == ip[2])) - goto match; - } - + const unsigned char *m_pos; + size_t t, m_len, m_off; + u32 dv; literal: - dict[dindex] = ip; - ++ip; + ip += 1 + ((ip - ii) >> 5); +next: if (unlikely(ip >= ip_end)) break; - continue; - -match: - dict[dindex] = ip; - if (ip != ii) { - size_t t = ip - ii; + dv = get_unaligned_le32(ip); + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; + m_pos = in + dict[t]; + dict[t] = (lzo_dict_t) (ip - in); + if (unlikely(dv != get_unaligned_le32(m_pos))) + goto literal; + ii -= ti; + ti = 0; + t = ip - ii; + if (t != 0) { if (t <= 3) { op[-2] |= t; - } else if (t <= 18) { + COPY4(op, ii); + op += t; + } else if (t <= 16) { *op++ = (t - 3); + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += t; } else { - size_t tt = t - 18; - - *op++ = 0; - while (tt > 255) { - tt -= 255; + if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; *op++ = 0; + while (unlikely(tt > 255)) { + tt -= 255; + *op++ = 0; + } + *op++ = tt; } - *op++ = tt; + do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { + *op++ = *ii++; + } while (--t > 0); } - do { - *op++ = *ii++; - } while (--t > 0); } - ip += 3; - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ - || m_pos[5] != *ip++ || m_pos[6] != *ip++ - || m_pos[7] != *ip++ || m_pos[8] != *ip++) { - --ip; - m_len = ip - ii; + m_len = 4; + { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) + u64 v; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 8; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctzll(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clzll(v) / 8; +# else +# error "missing endian definition" +# endif +#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) + u32 v; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 4; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (v != 0) + break; + m_len += 4; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctz(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clz(v) / 8; +# else +# error "missing endian definition" +# endif +#else + if (unlikely(ip[m_len] == m_pos[m_len])) { + do { + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (ip[m_len] == m_pos[m_len]); + } +#endif + } +m_len_done: - if (m_off <= M2_MAX_OFFSET) { - m_off -= 1; - *op++ = (((m_len - 1) << 5) - | ((m_off & 7) << 2)); - *op++ = (m_off >> 3); - } else if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; + m_off = ip - m_pos; + ip += m_len; + ii = ip; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { + m_off -= 1; + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = (m_off >> 3); + } else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= M3_MAX_LEN) *op++ = (M3_MARKER | (m_len - 2)); - goto m3_m4_offset; - } else { - m_off -= 0x4000; - - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) - | (m_len - 2)); - goto m3_m4_offset; + else { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; + } + *op++ = (m_len); } + *op++ = (m_off << 2); + *op++ = (m_off >> 6); } else { - end = in_end; - m = m_pos + M2_MAX_LEN + 1; - - while (ip < end && *m == *ip) { - m++; - ip++; - } - m_len = ip - ii; - - if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; - if (m_len <= 33) { - *op++ = (M3_MARKER | (m_len - 2)); - } else { - m_len -= 33; - *op++ = M3_MARKER | 0; - goto m3_m4_len; - } - } else { - m_off -= 0x4000; - if (m_len <= M4_MAX_LEN) { - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11) + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) + *op++ = (M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); - } else { - m_len -= M4_MAX_LEN; - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11)); -m3_m4_len: - while (m_len > 255) { - m_len -= 255; - *op++ = 0; - } - - *op++ = (m_len); + else { + m_len -= M4_MAX_LEN; + *op++ = (M4_MARKER | ((m_off >> 11) & 8)); + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; } + *op++ = (m_len); } -m3_m4_offset: - *op++ = ((m_off & 63) << 2); + *op++ = (m_off << 2); *op++ = (m_off >> 6); } - - ii = ip; - if (unlikely(ip >= ip_end)) - break; + goto next; } - *out_len = op - out; - return in_end - ii; + return in_end - (ii - ti); } -int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, - size_t *out_len, void *wrkmem) +int lzo1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) { - const unsigned char *ii; + const unsigned char *ip = in; unsigned char *op = out; - size_t t; + size_t l = in_len; + size_t t = 0; - if (unlikely(in_len <= M2_MAX_LEN + 5)) { - t = in_len; - } else { - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); + while (l > 20) { + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); + uintptr_t ll_end = (uintptr_t) ip + ll; + if ((ll_end + ((t + ll) >> 5)) <= ll_end) + break; + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); + ip += ll; op += *out_len; + l -= ll; } + t += l; if (t > 0) { - ii = in + in_len - t; + const unsigned char *ii = in + in_len - t; if (op == out && t <= 238) { *op++ = (17 + t); @@ -198,16 +247,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, *op++ = (t - 3); } else { size_t tt = t - 18; - *op++ = 0; while (tt > 255) { tt -= 255; *op++ = 0; } - *op++ = tt; } - do { + if (t >= 16) do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { *op++ = *ii++; } while (--t > 0); } @@ -223,4 +277,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X-1 Compressor"); - diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index f2fd09850223..569985d522d5 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -1,12 +1,12 @@ /* - * LZO1X Decompressor from MiniLZO + * LZO1X Decompressor from LZO * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ @@ -15,225 +15,207 @@ #include #include #endif - #include #include #include "lzodefs.h" -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) - -#define COPY4(dst, src) \ - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len) + unsigned char *out, size_t *out_len) { + unsigned char *op; + const unsigned char *ip; + size_t t, next; + size_t state = 0; + const unsigned char *m_pos; const unsigned char * const ip_end = in + in_len; unsigned char * const op_end = out + *out_len; - const unsigned char *ip = in, *m_pos; - unsigned char *op = out; - size_t t; - *out_len = 0; + op = out; + ip = in; + if (unlikely(in_len < 3)) + goto input_overrun; if (*ip > 17) { t = *ip++ - 17; - if (t < 4) + if (t < 4) { + next = t; goto match_next; - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - do { - *op++ = *ip++; - } while (--t > 0); - goto first_literal_run; - } - - while ((ip < ip_end)) { - t = *ip++; - if (t >= 16) - goto match; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 15 + *ip++; - } - if (HAVE_OP(t + 3, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 4, ip_end, ip)) - goto input_overrun; - - COPY4(op, ip); - op += 4; - ip += 4; - if (--t > 0) { - if (t >= 4) { - do { - COPY4(op, ip); - op += 4; - ip += 4; - t -= 4; - } while (t >= 4); - if (t > 0) { - do { - *op++ = *ip++; - } while (--t > 0); - } - } else { - do { - *op++ = *ip++; - } while (--t > 0); - } } + goto copy_literal_run; + } -first_literal_run: + for (;;) { t = *ip++; - if (t >= 16) - goto match; - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - - if (HAVE_OP(3, op_end, op)) - goto output_overrun; - *op++ = *m_pos++; - *op++ = *m_pos++; - *op++ = *m_pos; - - goto match_done; - - do { -match: - if (t >= 64) { - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - goto copy_match; - } else if (t >= 32) { - t &= 31; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { + if (t < 16) { + if (likely(state == 0)) { + if (unlikely(t == 0)) { + while (unlikely(*ip == 0)) { t += 255; ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; + NEED_IP(1); } - t += 31 + *ip++; + t += 15 + *ip++; } - m_pos = op - 1; - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - } else if (t >= 16) { - m_pos = op; - m_pos -= (t & 8) << 11; - - t &= 7; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 7 + *ip++; + t += 3; +copy_literal_run: +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { + const unsigned char *ie = ip + t; + unsigned char *oe = op + t; + do { + COPY8(op, ip); + op += 8; + ip += 8; + COPY8(op, ip); + op += 8; + ip += 8; + } while (ip < ie); + ip = ie; + op = oe; + } else +#endif + { + NEED_OP(t); + NEED_IP(t + 3); + do { + *op++ = *ip++; + } while (--t > 0); } - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; - } else { + state = 4; + continue; + } else if (state != 4) { + next = t & 3; m_pos = op - 1; m_pos -= t >> 2; m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(2, op_end, op)) - goto output_overrun; - - *op++ = *m_pos++; - *op++ = *m_pos; - goto match_done; + TEST_LB(m_pos); + NEED_OP(2); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + goto match_next; + } else { + next = t & 3; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + t = 3; } - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4 - (3 - 1); + } else if (t >= 64) { + next = t & 3; + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1 + (3 - 1); + } else if (t >= 32) { + t = (t & 31) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + NEED_IP(2); + } + m_pos = op - 1; + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + } else { + m_pos = op; + m_pos -= (t & 8) << 11; + t = (t & 7) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + NEED_IP(2); + } + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } + TEST_LB(m_pos); +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + if (op - m_pos >= 8) { + unsigned char *oe = op + t; + if (likely(HAVE_OP(t + 15))) { do { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4; - } while (t >= 4); - if (t > 0) - do { - *op++ = *m_pos++; - } while (--t > 0); + COPY8(op, m_pos); + op += 8; + m_pos += 8; + COPY8(op, m_pos); + op += 8; + m_pos += 8; + } while (op < oe); + op = oe; + if (HAVE_IP(6)) { + state = next; + COPY4(op, ip); + op += next; + ip += next; + continue; + } } else { -copy_match: - *op++ = *m_pos++; - *op++ = *m_pos++; + NEED_OP(t); do { *op++ = *m_pos++; - } while (--t > 0); + } while (op < oe); } -match_done: - t = ip[-2] & 3; - if (t == 0) - break; + } else +#endif + { + unsigned char *oe = op + t; + NEED_OP(t); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + m_pos += 2; + do { + *op++ = *m_pos++; + } while (op < oe); + } match_next: - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - - *op++ = *ip++; - if (t > 1) { + state = next; + t = next; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + if (likely(HAVE_IP(6) && HAVE_OP(4))) { + COPY4(op, ip); + op += t; + ip += t; + } else +#endif + { + NEED_IP(t + 3); + NEED_OP(t); + while (t > 0) { *op++ = *ip++; - if (t > 2) - *op++ = *ip++; + t--; } - - t = *ip++; - } while (ip < ip_end); + } } - *out_len = op - out; - return LZO_E_EOF_NOT_FOUND; - eof_found: *out_len = op - out; - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + return (t != 3 ? LZO_E_ERROR : + ip == ip_end ? LZO_E_OK : + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); + input_overrun: *out_len = op - out; return LZO_E_INPUT_OVERRUN; diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index b6d482c492ef..6710b83ce72e 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -1,19 +1,37 @@ /* * lzodefs.h -- architecture, OS and compiler specific defines * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO_VERSION 0x2020 -#define LZO_VERSION_STRING "2.02" -#define LZO_VERSION_DATE "Oct 17 2005" + +#define COPY4(dst, src) \ + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#if defined(__x86_64__) +#define COPY8(dst, src) \ + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) +#else +#define COPY8(dst, src) \ + COPY4(dst, src); COPY4((dst) + 4, (src) + 4) +#endif + +#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) +#error "conflicting endian definitions" +#elif defined(__x86_64__) +#define LZO_USE_CTZ64 1 +#define LZO_USE_CTZ32 1 +#elif defined(__i386__) || defined(__powerpc__) +#define LZO_USE_CTZ32 1 +#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) +#define LZO_USE_CTZ32 1 +#endif #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 @@ -34,10 +52,8 @@ #define M3_MARKER 32 #define M4_MARKER 16 -#define D_BITS 14 -#define D_MASK ((1u << D_BITS) - 1) +#define lzo_dict_t unsigned short +#define D_BITS 13 +#define D_SIZE (1u << D_BITS) +#define D_MASK (D_SIZE - 1) #define D_HIGH ((D_MASK >> 1) + 1) - -#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ - << (s1)) ^ (p)[0]) -#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0]) -- cgit v1.2.3 From 52608ba20546139dc76cca8a46c1d901455d5450 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Wed, 8 Aug 2012 12:30:56 -0300 Subject: i5100_edac: probe for device 19 function 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probe and store the device handle for the device 19 function 0 during driver initialization. The device is used during fault injection. Signed-off-by: Niklas Söderlund Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/i5100_edac.c | 28 +++++++++++++++++++++++++++- include/linux/pci_ids.h | 1 + 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index d6955b2cc99f..33c5c8e663f2 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -338,6 +338,7 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ @@ -869,7 +870,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -941,6 +942,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_disable_ch1; } + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -948,6 +965,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -999,6 +1017,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1036,8 +1060,10 @@ static void i5100_remove_one(struct pci_dev *pdev) pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0eb65796bcb9..d0d1e801e350 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2776,6 +2776,7 @@ #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 +#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 -- cgit v1.2.3 From c66b5a79a9348ccd6d1cd81416027d0e12da965d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 15 Feb 2013 07:21:08 -0300 Subject: edac: add a new memory layer type There are some cases where the memory controller layout is completely hidden. This is the case of firmware-driven error code, like the one provided by GHES. Add a new layer to be used on such memory error report mechanisms. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/edac_mc.c | 1 + include/linux/edac.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index fb219bc5cb2c..78d8c7d6e76a 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -900,6 +900,7 @@ const char *edac_layer_name[] = { [EDAC_MC_LAYER_CHANNEL] = "channel", [EDAC_MC_LAYER_SLOT] = "slot", [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", }; EXPORT_SYMBOL_GPL(edac_layer_name); diff --git a/include/linux/edac.h b/include/linux/edac.h index 4784213c819d..1b7744c219b8 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -375,6 +375,9 @@ enum scrub_type { * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" + * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped + * as a single memory area. This is used when + * retrieving errors from a firmware driven driver. * * This enum is used by the drivers to tell edac_mc_sysfs what name should * be used when describing a memory stick location. @@ -384,6 +387,7 @@ enum edac_mc_layer_type { EDAC_MC_LAYER_CHANNEL, EDAC_MC_LAYER_SLOT, EDAC_MC_LAYER_CHIP_SELECT, + EDAC_MC_LAYER_ALL_MEM, }; /** -- cgit v1.2.3 From c2c93dbc97622e26dc19edc71e50ebaa996d7804 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 19 Feb 2013 06:50:05 -0300 Subject: edac: remove proc_name from mci structure proc_name isn't used anywhere. Remove it. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 1b7744c219b8..ff18efc754f3 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -48,7 +48,6 @@ static inline void opstate_init(void) } #define EDAC_MC_LABEL_LEN 31 -#define MC_PROC_NAME_MAX_LEN 7 /** * enum dev_type - describe the type of memory DRAM chips used at the stick @@ -633,7 +632,6 @@ struct mem_ctl_info { const char *mod_ver; const char *ctl_name; const char *dev_name; - char proc_name[MC_PROC_NAME_MAX_LEN + 1]; void *pvt_info; unsigned long start_time; /* mci load start time (in jiffies) */ -- cgit v1.2.3 From c7ef7645544131b0750478d1cf94cdfa945c809d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 21 Feb 2013 13:36:45 -0300 Subject: edac: reduce stack pressure by using a pre-allocated buffer The number of variables at the stack is too big. Reduces the stack usage by using a pre-allocated error buffer. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/edac_mc.c | 81 ++++++++++++++++++++++++++++++-------------------- include/linux/edac.h | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 34eb9703ed33..4f18dd755939 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -1065,7 +1065,6 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -#define OTHER_LABEL " or " /** * edac_mc_handle_error - reports a memory event to userspace @@ -1097,19 +1096,28 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - /* FIXME: too much for stack: move it to some pre-alocated area */ - char detail[80], location[80]; - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; + char detail[80]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; - int i; - long grain; - bool enable_per_layer_report = false; + int i, n_labels = 0; u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; + /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be @@ -1132,7 +1140,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - enable_per_layer_report = true; + e->enable_per_layer_report = true; } /* @@ -1146,8 +1154,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ - grain = 0; - p = label; + p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { @@ -1161,8 +1168,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ - if (dimm->grain > grain) - grain = dimm->grain; + if (dimm->grain > e->grain) + e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to @@ -1170,8 +1177,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (enable_per_layer_report && dimm->nr_pages) { - if (p != label) { + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } @@ -1198,12 +1210,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } - if (!enable_per_layer_report) { - strcpy(label, "any memory"); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == label) - strcpy(label, "unknown memory"); + if (p == e->label) + strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; @@ -1216,7 +1228,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ - p = location; + p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) @@ -1226,32 +1238,35 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } - if (p > location) + if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ - grain_bits = fls_long(grain) + 1; - trace_mc_event(type, msg, label, error_count, - mci->mc_idx, top_layer, mid_layer, low_layer, - PAGES_TO_MiB(page_frame_number) | offset_in_page, - grain_bits, syndrome, other_detail); + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, other_detail); /* Memory type dependent details about the error */ if (type == HW_EVENT_ERR_CORRECTED) { snprintf(detail, sizeof(detail), "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - page_frame_number, offset_in_page, - grain, syndrome); - edac_ce_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report, - page_frame_number, offset_in_page, grain); + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, + e->label, detail, other_detail, + e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, + e->grain); } else { snprintf(detail, sizeof(detail), "page:0x%lx offset:0x%lx grain:%ld", - page_frame_number, offset_in_page, grain); + page_frame_number, offset_in_page, e->grain); - edac_ue_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report); + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, + e->label, detail, other_detail, + e->enable_per_layer_report); } } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/include/linux/edac.h b/include/linux/edac.h index ff18efc754f3..096b7fcdf484 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -47,8 +47,18 @@ static inline void opstate_init(void) return; } +/* Max length of a DIMM label*/ #define EDAC_MC_LABEL_LEN 31 +/* Maximum size of the location string */ +#define LOCATION_SIZE 80 + +/* Defines the maximum number of labels that can be reported */ +#define EDAC_MAX_LABELS 8 + +/* String used to join two or more labels */ +#define OTHER_LABEL " or " + /** * enum dev_type - describe the type of memory DRAM chips used at the stick * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it @@ -553,6 +563,46 @@ struct errcount_attribute_data { int layer0, layer1, layer2; }; +/** + * edac_raw_error_desc - Raw error report structure + * @grain: minimum granularity for an error report, in bytes + * @error_count: number of errors of the same type + * @top_layer: top layer of the error (layer[0]) + * @mid_layer: middle layer of the error (layer[1]) + * @low_layer: low layer of the error (layer[2]) + * @page_frame_number: page where the error happened + * @offset_in_page: page offset + * @syndrome: syndrome of the error (or 0 if unknown or if + * the syndrome is not applicable) + * @msg: error message + * @location: location of the error + * @label: label of the affected DIMM(s) + * @other_detail: other driver-specific detail about the error + * @enable_per_layer_report: if false, the error affects all layers + * (typically, a memory controller error) + */ +struct edac_raw_error_desc { + /* + * NOTE: everything before grain won't be cleaned by + * edac_raw_error_desc_clean() + */ + char location[LOCATION_SIZE]; + char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; + long grain; + + /* the vars below and grain will be cleaned on every new error report */ + u16 error_count; + int top_layer; + int mid_layer; + int low_layer; + unsigned long page_frame_number; + unsigned long offset_in_page; + unsigned long syndrome; + const char *msg; + const char *other_detail; + bool enable_per_layer_report; +}; + /* MEMORY controller information structure */ struct mem_ctl_info { @@ -660,6 +710,12 @@ struct mem_ctl_info { /* work struct for this MC */ struct delayed_work work; + /* + * Used to report an error - by being at the global struct + * makes the memory allocated by the EDAC core + */ + struct edac_raw_error_desc error_desc; + /* the internal state of this controller instance */ int op_state; -- cgit v1.2.3 From 8dd93d450bff251575c56b8f058393124e1f00fb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 19 Feb 2013 21:26:22 -0300 Subject: edac: add support for error type "Info" The CPER spec defines a forth type of error: informational logs. Add support for it at the edac API and at the trace event interface. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 16 ++++++++++++++++ include/ras/ras_event.h | 4 +--- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 096b7fcdf484..4fd4999ccb5b 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -109,8 +109,24 @@ enum hw_event_mc_err_type { HW_EVENT_ERR_CORRECTED, HW_EVENT_ERR_UNCORRECTED, HW_EVENT_ERR_FATAL, + HW_EVENT_ERR_INFO, }; +static inline char *mc_event_error_type(const unsigned int err_type) +{ + switch (err_type) { + case HW_EVENT_ERR_CORRECTED: + return "Corrected"; + case HW_EVENT_ERR_UNCORRECTED: + return "Uncorrected"; + case HW_EVENT_ERR_FATAL: + return "Fatal"; + default: + case HW_EVENT_ERR_INFO: + return "Info"; + } +} + /** * enum mem_type - memory types. For a more detailed reference, please see * http://en.wikipedia.org/wiki/DRAM diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 260470e72483..21cdb0b7b0fb 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -78,9 +78,7 @@ TRACE_EVENT(mc_event, TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)", __entry->error_count, - (__entry->error_type == HW_EVENT_ERR_CORRECTED) ? "Corrected" : - ((__entry->error_type == HW_EVENT_ERR_FATAL) ? - "Fatal" : "Uncorrected"), + mc_event_error_type(__entry->error_type), __entry->error_count > 1 ? "s" : "", ((char *)__get_str(msg))[0] ? " " : "", __get_str(msg), -- cgit v1.2.3 From aee38fadd25989c3e6d99fc08752e2d87601ffc1 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:07 +0000 Subject: IB/mlx4_ib: Remove local invalidate segment unused fields Remove unused fields from the local invalidate WQE segment structure. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 6 ++---- include/linux/mlx4/qp.h | 8 +++----- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 19e0637220b9..c6dde71b4642 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1983,10 +1983,8 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { - iseg->flags = 0; - iseg->mem_key = cpu_to_be32(rkey); - iseg->guest_id = 0; - iseg->pa = 0; + memset(iseg, 0, sizeof(*iseg)); + iseg->mem_key = cpu_to_be32(rkey); } static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 4b4ad6ffef92..6c8a68c602be 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -304,12 +304,10 @@ struct mlx4_wqe_fmr_ext_seg { }; struct mlx4_wqe_local_inval_seg { - __be32 flags; - u32 reserved1; + u64 reserved1; __be32 mem_key; - u32 reserved2[2]; - __be32 guest_id; - __be64 pa; + u32 reserved2; + u64 reserved3[2]; }; struct mlx4_wqe_raddr_seg { -- cgit v1.2.3 From 61083720702a329ed5952e32bda384e3bbc9093c Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:09 +0000 Subject: mlx4_core: Propagate MR deregistration failures to caller MR deregistration fails when memory windows are bound to the MR. Handle such failures by propagating them to the caller ULP. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mr.c | 13 ++++++++----- drivers/net/ethernet/mellanox/mlx4/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mr.c | 29 +++++++++++++++++++++------- include/linux/mlx4/device.h | 2 +- 4 files changed, 33 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index bbaf6176f207..254e1cf26439 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -68,7 +68,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_free: kfree(mr); @@ -163,7 +163,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_umem: ib_umem_release(mr->umem); @@ -177,8 +177,11 @@ err_free: int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); + int ret; - mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + if (ret) + return ret; if (mr->umem) ib_umem_release(mr->umem); kfree(mr); @@ -212,7 +215,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, return &mr->ibmr; err_mr: - mlx4_mr_free(dev->dev, &mr->mmr); + (void) mlx4_mr_free(dev->dev, &mr->mmr); err_free: kfree(mr); @@ -291,7 +294,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, return &fmr->ibfmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); err_free: kfree(fmr); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 3a2b8c65642d..a2987142734d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -176,7 +176,7 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); - mlx4_mr_free(dev, &mdev->mr); + (void) mlx4_mr_free(dev, &mdev->mr); iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); @@ -283,7 +283,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) return mdev; err_mr: - mlx4_mr_free(dev, &mdev->mr); + (void) mlx4_mr_free(dev, &mdev->mr); err_map: if (!mdev->uar_map) iounmap(mdev->uar_map); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 49705cf860c6..06b16e4ace80 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -442,7 +442,7 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, } EXPORT_SYMBOL_GPL(mlx4_mr_alloc); -static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) +static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) { int err; @@ -450,20 +450,31 @@ static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); - if (err) - mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d),", err); + mlx4_warn(dev, "MR has MWs bound to it.\n"); + return err; + } mr->enabled = MLX4_MPT_EN_SW; } mlx4_mtt_cleanup(dev, &mr->mtt); + + return 0; } -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) { - mlx4_mr_free_reserved(dev, mr); + int ret; + + ret = mlx4_mr_free_reserved(dev, mr); + if (ret) + return ret; if (mr->enabled) mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); mlx4_mpt_release(dev, key_to_hw_index(mr->key)); + + return 0; } EXPORT_SYMBOL_GPL(mlx4_mr_free); @@ -831,7 +842,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, return 0; err_free: - mlx4_mr_free(dev, &fmr->mr); + (void) mlx4_mr_free(dev, &fmr->mr); return err; } EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); @@ -888,10 +899,14 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { + int ret; + if (fmr->maps) return -EBUSY; - mlx4_mr_free(dev, &fmr->mr); + ret = mlx4_mr_free(dev, &fmr->mr); + if (ret) + return ret; fmr->mr.enabled = MLX4_MPT_DISABLED; return 0; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 20ea939c22a6..e9fe8caaf8bb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -801,7 +801,7 @@ u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); -- cgit v1.2.3 From 6640dfdf6fff387c0a8f7fb8ac1d47c6b093484e Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:32 -0800 Subject: compiler-gcc4.h: Reorder macros based upon gcc ver This helps to keep the file from getting confusing, removes one duplicate version check and should encourage future editors to put new macros where they belong. Signed-off-by: Daniel Santos Acked-by: David Rientjes Acked-by: Borislav Petkov Cc: Andi Kleen Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 662fd1b4c42a..c9785c22744e 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -13,6 +13,10 @@ #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) +#if __GNUC_MINOR__ > 0 +# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif + #if __GNUC_MINOR__ >= 3 /* Mark functions as cold. gcc will assume any path leading to a call to them will be unlikely. This means a lot of manual unlikely()s @@ -33,6 +37,12 @@ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) +#ifndef __CHECKER__ +# define __compiletime_warning(message) __attribute__((warning(message))) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* __CHECKER__ */ +#endif /* __GNUC_MINOR__ >= 3 */ + #if __GNUC_MINOR__ >= 5 /* * Mark a position in code as unreachable. This can be used to @@ -48,8 +58,7 @@ /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__)) -#endif -#endif +#endif /* __GNUC_MINOR__ >= 5 */ #if __GNUC_MINOR__ >= 6 /* @@ -58,13 +67,6 @@ #define __visible __attribute__((externally_visible)) #endif -#if __GNUC_MINOR__ > 0 -#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) -#endif -#if __GNUC_MINOR__ >= 3 && !defined(__CHECKER__) -#define __compiletime_warning(message) __attribute__((warning(message))) -#define __compiletime_error(message) __attribute__((error(message))) -#endif #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP #if __GNUC_MINOR__ >= 4 -- cgit v1.2.3 From 3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:39 -0800 Subject: compiler-gcc.h: Add gcc-recommended GCC_VERSION macro Throughout compiler*.h, many version checks are made. These can be simplified by using the macro that gcc's documentation recommends. However, my primary reason for adding this is that I need bug-check macros that are enabled at certain gcc versions and it's cleaner to use this macro than the tradition method: #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ => 2) If you add patch level, it gets this ugly: #if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ > 2 || \ __GNUC_MINOR__ == 2 __GNUC_PATCHLEVEL__ >= 1)) As opposed to: #if GCC_VERSION >= 40201 While having separate headers for gcc 3 & 4 eliminates some of this verbosity, they can still be cleaned up by this. See also: http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html Signed-off-by: Daniel Santos Acked-by: Borislav Petkov Acked-by: David Rientjes Cc: Andi Kleen Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 6a6d7aefe12d..24545cd90a25 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -5,6 +5,9 @@ /* * Common definitions for all gcc versions go here. */ +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) /* Optimization barrier */ -- cgit v1.2.3 From 733ed6e43756b0aec25c9429b810ba74e24c980c Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:41 -0800 Subject: compiler-gcc{3,4}.h: Use GCC_VERSION macro Using GCC_VERSION reduces complexity, is easier to read and is GCC's recommended mechanism for doing version checks. (Just don't ask me why they didn't define it in the first place.) This also makes it easy to merge compiler-gcc{,3,4}.h should somebody want to. Signed-off-by: Daniel Santos Acked-by: David Rientjes Acked-by: Borislav Petkov Cc: Andi Kleen Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc3.h | 8 ++++---- include/linux/compiler-gcc4.h | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index 37d412436d0f..7d89febe4d79 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h @@ -2,22 +2,22 @@ #error "Please don't include directly, include instead." #endif -#if __GNUC_MINOR__ < 2 +#if GCC_VERSION < 30200 # error Sorry, your compiler is too old - please upgrade it. #endif -#if __GNUC_MINOR__ >= 3 +#if GCC_VERSION >= 30300 # define __used __attribute__((__used__)) #else # define __used __attribute__((__unused__)) #endif -#if __GNUC_MINOR__ >= 4 +#if GCC_VERSION >= 30400 #define __must_check __attribute__((warn_unused_result)) #endif #ifdef CONFIG_GCOV_KERNEL -# if __GNUC_MINOR__ < 4 +# if GCC_VERSION < 30400 # error "GCOV profiling support for gcc versions below 3.4 not included" # endif /* __GNUC_MINOR__ */ #endif /* CONFIG_GCOV_KERNEL */ diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index c9785c22744e..a9ffdfe7713c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -4,7 +4,7 @@ /* GCC 4.1.[01] miscompiles __weak */ #ifdef __KERNEL__ -# if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1 +# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101 # error Your version of gcc miscompiles the __weak directive # endif #endif @@ -13,11 +13,11 @@ #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) -#if __GNUC_MINOR__ > 0 +#if GCC_VERSION >= 40100 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #endif -#if __GNUC_MINOR__ >= 3 +#if GCC_VERSION >= 40300 /* Mark functions as cold. gcc will assume any path leading to a call to them will be unlikely. This means a lot of manual unlikely()s are unnecessary now for any paths leading to the usual suspects @@ -41,9 +41,9 @@ # define __compiletime_warning(message) __attribute__((warning(message))) # define __compiletime_error(message) __attribute__((error(message))) #endif /* __CHECKER__ */ -#endif /* __GNUC_MINOR__ >= 3 */ +#endif /* GCC_VERSION >= 40300 */ -#if __GNUC_MINOR__ >= 5 +#if GCC_VERSION >= 40500 /* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer @@ -58,9 +58,9 @@ /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__)) -#endif /* __GNUC_MINOR__ >= 5 */ +#endif /* GCC_VERSION >= 40500 */ -#if __GNUC_MINOR__ >= 6 +#if GCC_VERSION >= 40600 /* * Tell the optimizer that something else uses this function or variable. */ @@ -69,11 +69,11 @@ #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP -#if __GNUC_MINOR__ >= 4 +#if GCC_VERSION >= 40400 #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ #endif -#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6) +#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) #define __HAVE_BUILTIN_BSWAP16__ #endif -#endif +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ -- cgit v1.2.3 From 6ae8d04871f84d853673e9e9f3f713e77a2fe145 Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:42 -0800 Subject: compiler{,-gcc4}.h, bug.h: Remove duplicate macros __linktime_error() does the same thing as __compiletime_error() and is only used in bug.h. Since the macro defines a function attribute that will cause a failure at compile-time (not link-time), it makes more sense to keep __compiletime_error(), which is also neatly mated with __compiletime_warning(). Signed-off-by: Daniel Santos Acked-by: David Rientjes Acked-by: Borislav Petkov Cc: Andi Kleen Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 2 +- include/linux/compiler-gcc4.h | 2 -- include/linux/compiler.h | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index b1cf40de847e..2a11774c5e64 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -74,7 +74,7 @@ extern int __build_bug_on_failed; #define BUILD_BUG() \ do { \ extern void __build_bug_failed(void) \ - __linktime_error("BUILD_BUG failed"); \ + __compiletime_error("BUILD_BUG failed");\ __build_bug_failed(); \ } while (0) diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index a9ffdfe7713c..68b162d92254 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -33,8 +33,6 @@ the kernel context */ #define __cold __attribute__((__cold__)) -#define __linktime_error(message) __attribute__((__error__(message))) - #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) #ifndef __CHECKER__ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index dd852b73b286..4c638be76093 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -308,9 +308,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_error # define __compiletime_error(message) #endif -#ifndef __linktime_error -# define __linktime_error(message) -#endif /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3 From ca623c914e82c3351cd657073fdb24a1df8c27b9 Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:44 -0800 Subject: bug.h: fix BUILD_BUG_ON macro in __CHECKER__ When __CHECKER__ is defined, we disable all of the BUILD_BUG.* macros. However, both BUILD_BUG_ON_NOT_POWER_OF_2 and BUILD_BUG_ON was evaluating to nothing in this case, and we want (0) since this is a function-like macro that will be followed by a semicolon. Signed-off-by: Daniel Santos Acked-by: Borislav Petkov Cc: Andi Kleen Cc: David Rientjes Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 2a11774c5e64..27d404f91b3e 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -12,11 +12,11 @@ enum bug_trap_type { struct pt_regs; #ifdef __CHECKER__ -#define BUILD_BUG_ON_NOT_POWER_OF_2(n) +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) #define BUILD_BUG_ON_NULL(e) ((void*)0) #define BUILD_BUG_ON_INVALID(e) (0) -#define BUILD_BUG_ON(condition) +#define BUILD_BUG_ON(condition) (0) #define BUILD_BUG() (0) #else /* __CHECKER__ */ -- cgit v1.2.3 From 1d6a0d19c85587581a364850b77f30446810a560 Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:45 -0800 Subject: bug.h: prevent double evaulation of `condition' in BUILD_BUG_ON When calling BUILD_BUG_ON in an optimized build using gcc 4.3 and later, the condition will be evaulated twice, possibily with side-effects. This patch eliminates that error. [akpm@linux-foundation.org: tweak code layout] Signed-off-by: Daniel Santos Cc: Andi Kleen Cc: Borislav Petkov Cc: David Rientjes Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 27d404f91b3e..89fb91d0c929 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -59,9 +59,10 @@ struct pt_regs; extern int __build_bug_on_failed; #define BUILD_BUG_ON(condition) \ do { \ - ((void)sizeof(char[1 - 2*!!(condition)])); \ - if (condition) __build_bug_on_failed = 1; \ - } while(0) + bool __cond = !!(condition); \ + ((void)sizeof(char[1 - 2 * __cond])); \ + if (__cond) __build_bug_on_failed = 1; \ + } while (0) #endif /** -- cgit v1.2.3 From a3ccc497cd17147713363a4bf975f1a269fadb6d Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:52 -0800 Subject: bug.h: make BUILD_BUG_ON generate compile-time error Negative sized arrays wont create a compile-time error in some cases starting with gcc 4.4 (e.g., inlined functions), but gcc 4.3 introduced the error function attribute that will. This patch modifies BUILD_BUG_ON to behave like BUILD_BUG already does, using the error function attribute so that you don't have to build the entire kernel to discover that you have a problem, and then enjoy trying to track it down from a link-time error. Also, we are only including asm/bug.h and then expecting that linux/compiler.h will eventually be included to define __linktime_error (used in BUILD_BUG_ON). This patch includes it directly for clarity and to avoid the possibility of changes in /*/include/asm/bug.h being changed or not including linux/compiler.h for some reason. Signed-off-by: Daniel Santos Acked-by: Borislav Petkov Cc: Andi Kleen Cc: David Rientjes Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 89fb91d0c929..73af37ca472c 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -2,6 +2,7 @@ #define _LINUX_BUG_H #include +#include enum bug_trap_type { BUG_TRAP_TYPE_NONE = 0, @@ -43,25 +44,30 @@ struct pt_regs; * @condition: the condition which the compiler should know is false. * * If you have some code which relies on certain constants being equal, or - * other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to * detect if someone changes it. * - * The implementation uses gcc's reluctance to create a negative array, but - * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments - * to inline functions). So as a fallback we use the optimizer; if it can't - * prove the condition is false, it will cause a link error on the undefined - * "__build_bug_on_failed". This error message can be harder to track down - * though, hence the two different methods. + * The implementation uses gcc's reluctance to create a negative array, but gcc + * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to + * inline functions). Luckily, in 4.3 they added the "error" function + * attribute just for this type of case. Thus, we use a negative sized array + * (should always create an error on gcc versions older than 4.4) and then call + * an undefined function with the error attribute (should always create an + * error on gcc 4.3 and later). If for some reason, neither creates a + * compile-time error, we'll still have a link-time error, which is harder to + * track down. */ #ifndef __OPTIMIZE__ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #else -extern int __build_bug_on_failed; -#define BUILD_BUG_ON(condition) \ - do { \ - bool __cond = !!(condition); \ - ((void)sizeof(char[1 - 2 * __cond])); \ - if (__cond) __build_bug_on_failed = 1; \ +#define BUILD_BUG_ON(condition) \ + do { \ + bool __cond = !!(condition); \ + extern void __build_bug_on_failed(void) \ + __compiletime_error("BUILD_BUG_ON failed"); \ + if (__cond) \ + __build_bug_on_failed(); \ + ((void)sizeof(char[1 - 2 * __cond])); \ } while (0) #endif -- cgit v1.2.3 From c361d3e54364d19bb5e803d6e766e94674da7b0e Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:54 -0800 Subject: compiler.h, bug.h: prevent double error messages with BUILD_BUG{,_ON} Prior to the introduction of __attribute__((error("msg"))) in gcc 4.3, creating compile-time errors required a little trickery. BUILD_BUG{,_ON} uses this attribute when available to generate compile-time errors, but also uses the negative-sized array trick for older compilers, resulting in two error messages in some cases. The reason it's "some" cases is that as of gcc 4.4, the negative-sized array will not create an error in some situations, like inline functions. This patch replaces the negative-sized array code with the new __compiletime_error_fallback() macro which expands to the same thing unless the the error attribute is available, in which case it expands to do{}while(0), resulting in exactly one compile-time error on all versions of gcc. Note that we are not changing the negative-sized array code for the unoptimized version of BUILD_BUG_ON, since it has the potential to catch problems that would be disabled in later versions of gcc were __compiletime_error_fallback used. The reason is that that an unoptimized build can't always remove calls to an error-attributed function call (like we are using) that should effectively become dead code if it were optimized. However, using a negative-sized array with a similar value will not result in an false-positive (error). The only caveat being that it will also fail to catch valid conditions, which we should be expecting in an unoptimized build anyway. Signed-off-by: Daniel Santos Cc: Andi Kleen Cc: Borislav Petkov Cc: David Rientjes Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 2 +- include/linux/compiler.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 73af37ca472c..dc11dc762fc3 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -67,7 +67,7 @@ struct pt_regs; __compiletime_error("BUILD_BUG_ON failed"); \ if (__cond) \ __build_bug_on_failed(); \ - ((void)sizeof(char[1 - 2 * __cond])); \ + __compiletime_error_fallback(__cond); \ } while (0) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 4c638be76093..423bb6bd660f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -307,7 +307,12 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #endif #ifndef __compiletime_error # define __compiletime_error(message) +# define __compiletime_error_fallback(condition) \ + do { ((void)sizeof(char[1 - 2*!!(condition)])); } while (0) +#else +# define __compiletime_error_fallback(condition) do { } while (0) #endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3 From 9a8ab1c39970a4938a72d94e6fd13be88a797590 Mon Sep 17 00:00:00 2001 From: Daniel Santos Date: Thu, 21 Feb 2013 16:41:55 -0800 Subject: bug.h, compiler.h: introduce compiletime_assert & BUILD_BUG_ON_MSG Introduce compiletime_assert to compiler.h, which moves the details of how to break a build and emit an error message for a specific compiler to the headers where these details should be. Following in the tradition of the POSIX assert macro, compiletime_assert creates a build-time error when the supplied condition is *false*. Next, we add BUILD_BUG_ON_MSG to bug.h which simply wraps compiletime_assert, inverting the logic, so that it fails when the condition is *true*, consistent with the language "build bug on." This macro allows you to specify the error message you want emitted when the supplied condition is true. Finally, we remove all other code from bug.h that mucks with these details (BUILD_BUG & BUILD_BUG_ON), and have them all call BUILD_BUG_ON_MSG. This not only reduces source code bloat, but also prevents the possibility of code being changed for one macro and not for the other (which was previously the case for BUILD_BUG and BUILD_BUG_ON). Since __compiletime_error_fallback is now only used in compiler.h, I'm considering it a private macro and removing the double negation that's now extraneous. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Daniel Santos Cc: Andi Kleen Cc: Borislav Petkov Cc: David Rientjes Cc: Joe Perches Cc: Josh Triplett Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 28 +++++++++++++--------------- include/linux/compiler.h | 26 +++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index dc11dc762fc3..7f4818673c41 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -17,6 +17,7 @@ struct pt_regs; #define BUILD_BUG_ON_ZERO(e) (0) #define BUILD_BUG_ON_NULL(e) ((void*)0) #define BUILD_BUG_ON_INVALID(e) (0) +#define BUILD_BUG_ON_MSG(cond, msg) (0) #define BUILD_BUG_ON(condition) (0) #define BUILD_BUG() (0) #else /* __CHECKER__ */ @@ -39,6 +40,15 @@ struct pt_regs; */ #define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) +/** + * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied + * error message. + * @condition: the condition which the compiler should know is false. + * + * See BUILD_BUG_ON for description. + */ +#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) + /** * BUILD_BUG_ON - break compile if a condition is true. * @condition: the condition which the compiler should know is false. @@ -60,15 +70,8 @@ struct pt_regs; #ifndef __OPTIMIZE__ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #else -#define BUILD_BUG_ON(condition) \ - do { \ - bool __cond = !!(condition); \ - extern void __build_bug_on_failed(void) \ - __compiletime_error("BUILD_BUG_ON failed"); \ - if (__cond) \ - __build_bug_on_failed(); \ - __compiletime_error_fallback(__cond); \ - } while (0) +#define BUILD_BUG_ON(condition) \ + BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) #endif /** @@ -78,12 +81,7 @@ struct pt_regs; * build time, you should use BUILD_BUG to detect if it is * unexpectedly used. */ -#define BUILD_BUG() \ - do { \ - extern void __build_bug_failed(void) \ - __compiletime_error("BUILD_BUG failed");\ - __build_bug_failed(); \ - } while (0) +#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") #endif /* __CHECKER__ */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 423bb6bd660f..10b8f23fab0f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -308,11 +308,35 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_error # define __compiletime_error(message) # define __compiletime_error_fallback(condition) \ - do { ((void)sizeof(char[1 - 2*!!(condition)])); } while (0) + do { ((void)sizeof(char[1 - 2 * condition])); } while (0) #else # define __compiletime_error_fallback(condition) do { } while (0) #endif +#define __compiletime_assert(condition, msg, prefix, suffix) \ + do { \ + bool __cond = !(condition); \ + extern void prefix ## suffix(void) __compiletime_error(msg); \ + if (__cond) \ + prefix ## suffix(); \ + __compiletime_error_fallback(__cond); \ + } while (0) + +#define _compiletime_assert(condition, msg, prefix, suffix) \ + __compiletime_assert(condition, msg, prefix, suffix) + +/** + * compiletime_assert - break build and emit msg if condition is false + * @condition: a compile-time constant condition to check + * @msg: a message to emit if condition is false + * + * In tradition of POSIX assert, this macro will break the build if the + * supplied condition is *false*, emitting the supplied error message if the + * compiler has support to do so. + */ +#define compiletime_assert(condition, msg) \ + _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3 From b1ae345d971664f70cfdc293029c40ccfb093591 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Feb 2013 16:42:47 -0800 Subject: lockdep: make lockdep_assert_held() not have a return value I recently made the mistake of writing: foo = lockdep_dereference_protected(..., lockdep_assert_held(...)); which is clearly bogus. If lockdep is disabled in the config this would cause a compile failure, if it is enabled then it compiles and causes a puzzling warning about dereferencing without the correct protection. Wrap the macro in "do { ... } while (0)" to also fail compile for this when lockdep is enabled. Signed-off-by: Johannes Berg Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 2bca44b0893c..bfe88c4aa251 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -359,7 +359,9 @@ extern void lockdep_trace_alloc(gfp_t mask); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) -#define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l)) +#define lockdep_assert_held(l) do { \ + WARN_ON(debug_locks && !lockdep_is_held(l)); \ + } while (0) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) -- cgit v1.2.3 From 7d311cdab663f4f7ab3a4c0d5d484234406f8268 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Feb 2013 16:42:48 -0800 Subject: bdi: allow block devices to say that they require stable page writes This patchset ("stable page writes, part 2") makes some key modifications to the original 'stable page writes' patchset. First, it provides creators (devices and filesystems) of a backing_dev_info a flag that declares whether or not it is necessary to ensure that page contents cannot change during writeout. It is no longer assumed that this is true of all devices (which was never true anyway). Second, the flag is used to relaxed the wait_on_page_writeback calls so that wait only occurs if the device needs it. Third, it fixes up the remaining disk-backed filesystems to use this improved conditional-wait logic to provide stable page writes on those filesystems. It is hoped that (for people not using checksumming devices, anyway) this patchset will give back unnecessary performance decreases since the original stable page write patchset went into 3.0. Sorry about not fixing it sooner. Complaints were registered by several people about the long write latencies introduced by the original stable page write patchset. Generally speaking, the kernel ought to allocate as little extra memory as possible to facilitate writeout, but for people who simply cannot wait, a second page stability strategy is (re)introduced: snapshotting page contents. The waiting behavior is still the default strategy; to enable page snapshotting, a superblock flag (MS_SNAP_STABLE) must be set. This flag is used to bandaid^Henable stable page writeback on ext3[1], and is not used anywhere else. Given that there are already a few storage devices and network FSes that have rolled their own page stability wait/page snapshot code, it would be nice to move towards consolidating all of these. It seems possible that iscsi and raid5 may wish to use the new stable page write support to enable zero-copy writeout. Thank you to Jan Kara for helping fix a couple more filesystems. Per Andrew Morton's request, here are the result of using dbench to measure latencies on ext2: 3.8.0-rc3: Operation Count AvgLat MaxLat ---------------------------------------- WriteX 109347 0.028 59.817 ReadX 347180 0.004 3.391 Flush 15514 29.828 287.283 Throughput 57.429 MB/sec 4 clients 4 procs max_latency=287.290 ms 3.8.0-rc3 + patches: WriteX 105556 0.029 4.273 ReadX 335004 0.005 4.112 Flush 14982 30.540 298.634 Throughput 55.4496 MB/sec 4 clients 4 procs max_latency=298.650 ms As you can see, for ext2 the maximum write latency decreases from ~60ms on a laptop hard disk to ~4ms. I'm not sure why the flush latencies increase, though I suspect that being able to dirty pages faster gives the flusher more work to do. On ext4, the average write latency decreases as well as all the maximum latencies: 3.8.0-rc3: WriteX 85624 0.152 33.078 ReadX 272090 0.010 61.210 Flush 12129 36.219 168.260 Throughput 44.8618 MB/sec 4 clients 4 procs max_latency=168.276 ms 3.8.0-rc3 + patches: WriteX 86082 0.141 30.928 ReadX 273358 0.010 36.124 Flush 12214 34.800 165.689 Throughput 44.9941 MB/sec 4 clients 4 procs max_latency=165.722 ms XFS seems to exhibit similar latency improvements as ext2: 3.8.0-rc3: WriteX 125739 0.028 104.343 ReadX 399070 0.005 4.115 Flush 17851 25.004 131.390 Throughput 66.0024 MB/sec 4 clients 4 procs max_latency=131.406 ms 3.8.0-rc3 + patches: WriteX 123529 0.028 6.299 ReadX 392434 0.005 4.287 Flush 17549 25.120 188.687 Throughput 64.9113 MB/sec 4 clients 4 procs max_latency=188.704 ms ...and btrfs, just to round things out, also shows some latency decreases: 3.8.0-rc3: WriteX 67122 0.083 82.355 ReadX 212719 0.005 2.828 Flush 9547 47.561 147.418 Throughput 35.3391 MB/sec 4 clients 4 procs max_latency=147.433 ms 3.8.0-rc3 + patches: WriteX 64898 0.101 71.631 ReadX 206673 0.005 7.123 Flush 9190 47.963 219.034 Throughput 34.0795 MB/sec 4 clients 4 procs max_latency=219.044 ms Before this patchset, all filesystems would block, regardless of whether or not it was necessary. ext3 would wait, but still generate occasional checksum errors. The network filesystems were left to do their own thing, so they'd wait too. After this patchset, all the disk filesystems except ext3 and btrfs will wait only if the hardware requires it. ext3 (if necessary) snapshots pages instead of blocking, and btrfs provides its own bdi so the mm will never wait. Network filesystems haven't been touched, so either they provide their own wait code, or they don't block at all. The blocking behavior is back to what it was before 3.0 if you don't have a disk requiring stable page writes. This patchset has been tested on 3.8.0-rc3 on x64 with ext3, ext4, and xfs. I've spot-checked 3.8.0-rc4 and seem to be getting the same results as -rc3. [1] The alternative fixes to ext3 include fixing the locking order and page bit handling like we did for ext4 (but then why not just use ext4?), or setting PG_writeback so early that ext3 becomes extremely slow. I tried that, but the number of write()s I could initiate dropped by nearly an order of magnitude. That was a bit much even for the author of the stable page series! :) This patch: Creates a per-backing-device flag that tracks whether or not pages must be held immutable during writeout. Eventually it will be used to waive wait_for_page_writeback() if nothing requires stable pages. Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Artem Bityutskiy Cc: Joel Becker Cc: Mark Fasheh Cc: Steven Whitehouse Cc: Jens Axboe Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-bdi | 5 +++++ block/blk-integrity.c | 4 ++++ include/linux/backing-dev.h | 6 ++++++ mm/backing-dev.c | 11 +++++++++++ 4 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5f500977b42f..d773d5697cf5 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -48,3 +48,8 @@ max_ratio (read-write) most of the write-back cache. For example in case of an NFS mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. + +stable_pages_required (read-only) + + If set, the backing device requires that all pages comprising a write + request must not be changed until writeout is complete. diff --git a/block/blk-integrity.c b/block/blk-integrity.c index da2a818c3a92..dabd221857e1 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -420,6 +420,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) } else bi->name = bi_unsupported_name; + disk->queue->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + return 0; } EXPORT_SYMBOL(blk_integrity_register); @@ -438,6 +440,8 @@ void blk_integrity_unregister(struct gendisk *disk) if (!disk || !disk->integrity) return; + disk->queue->backing_dev_info.capabilities &= ~BDI_CAP_STABLE_WRITES; + bi = disk->integrity; kobject_uevent(&bi->kobj, KOBJ_REMOVE); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 12731a19ef06..350459910fe1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -254,6 +254,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_EXEC_MAP 0x00000040 #define BDI_CAP_NO_ACCT_WB 0x00000080 #define BDI_CAP_SWAP_BACKED 0x00000100 +#define BDI_CAP_STABLE_WRITES 0x00000200 #define BDI_CAP_VMFLAGS \ (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) @@ -308,6 +309,11 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout); int pdflush_proc_obsolete(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) +{ + return bdi->capabilities & BDI_CAP_STABLE_WRITES; +} + static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca2b3ee176..41733c5dc820 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -221,12 +221,23 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio) +static ssize_t stable_pages_required_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return snprintf(page, PAGE_SIZE-1, "%d\n", + bdi_cap_stable_pages_required(bdi) ? 1 : 0); +} + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), + __ATTR_RO(stable_pages_required), __ATTR_NULL, }; -- cgit v1.2.3 From 1d1d1a767206fbe5d4c69493b7e6d2a8d08cc0a0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Feb 2013 16:42:51 -0800 Subject: mm: only enforce stable page writes if the backing device requires it Create a helper function to check if a backing device requires stable page writes and, if so, performs the necessary wait. Then, make it so that all points in the memory manager that handle making pages writable use the helper function. This should provide stable page write support to most filesystems, while eliminating unnecessary waiting for devices that don't require the feature. Before this patchset, all filesystems would block, regardless of whether or not it was necessary. ext3 would wait, but still generate occasional checksum errors. The network filesystems were left to do their own thing, so they'd wait too. After this patchset, all the disk filesystems except ext3 and btrfs will wait only if the hardware requires it. ext3 (if necessary) snapshots pages instead of blocking, and btrfs provides its own bdi so the mm will never wait. Network filesystems haven't been touched, so either they provide their own stable page guarantees or they don't block at all. The blocking behavior is back to what it was before 3.0 if you don't have a disk requiring stable page writes. Here's the result of using dbench to test latency on ext2: 3.8.0-rc3: Operation Count AvgLat MaxLat ---------------------------------------- WriteX 109347 0.028 59.817 ReadX 347180 0.004 3.391 Flush 15514 29.828 287.283 Throughput 57.429 MB/sec 4 clients 4 procs max_latency=287.290 ms 3.8.0-rc3 + patches: WriteX 105556 0.029 4.273 ReadX 335004 0.005 4.112 Flush 14982 30.540 298.634 Throughput 55.4496 MB/sec 4 clients 4 procs max_latency=298.650 ms As you can see, the maximum write latency drops considerably with this patch enabled. The other filesystems (ext3/ext4/xfs/btrfs) behave similarly, but see the cover letter for those results. Signed-off-by: Darrick J. Wong Acked-by: Steven Whitehouse Reviewed-by: Jan Kara Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Artem Bityutskiy Cc: Joel Becker Cc: Mark Fasheh Cc: Jens Axboe Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- fs/ext4/inode.c | 2 +- fs/gfs2/file.c | 2 +- fs/nilfs2/file.c | 2 +- include/linux/pagemap.h | 1 + mm/filemap.c | 3 ++- mm/page-writeback.c | 20 ++++++++++++++++++++ 7 files changed, 27 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 7a75c3e0fd58..2ea9cd44aeae 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (unlikely(ret < 0)) goto out_unlock; set_page_dirty(page); - wait_on_page_writeback(page); + wait_for_stable_page(page); return 0; out_unlock: unlock_page(page); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cbfe13bf5b2a..cd818d8bb221 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4968,7 +4968,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 0, len, NULL, ext4_bh_unmapped)) { /* Wait so that we don't change page under IO */ - wait_on_page_writeback(page); + wait_for_stable_page(page); ret = VM_FAULT_LOCKED; goto out; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 06b7092a3f25..2687f50d98cb 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -483,7 +483,7 @@ out: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); - wait_on_page_writeback(page); + wait_for_stable_page(page); } sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 61946883025c..bec4af6eab13 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - wait_on_page_writeback(page); + wait_for_stable_page(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 6da609d14c15..0e38e13eb249 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -414,6 +414,7 @@ static inline void wait_on_page_writeback(struct page *page) } extern void end_page_writeback(struct page *page); +void wait_for_stable_page(struct page *page); /* * Add an arbitrary waiter to a page's wait queue diff --git a/mm/filemap.c b/mm/filemap.c index 24a7ea583f0c..c610076c30e1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1728,6 +1728,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) * see the dirty page and writeprotect it again. */ set_page_dirty(page); + wait_for_stable_page(page); out: sb_end_pagefault(inode->i_sb); return ret; @@ -2274,7 +2275,7 @@ repeat: return NULL; } found: - wait_on_page_writeback(page); + wait_for_stable_page(page); return page; } EXPORT_SYMBOL(grab_cache_page_write_begin); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 66a0024becd9..355d5ee69058 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2290,3 +2290,23 @@ int mapping_tagged(struct address_space *mapping, int tag) return radix_tree_tagged(&mapping->page_tree, tag); } EXPORT_SYMBOL(mapping_tagged); + +/** + * wait_for_stable_page() - wait for writeback to finish, if necessary. + * @page: The page to wait on. + * + * This function determines if the given page is related to a backing device + * that requires page contents to be held stable during writeback. If so, then + * it will wait for any pending writeback to complete. + */ +void wait_for_stable_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + struct backing_dev_info *bdi = mapping->backing_dev_info; + + if (!bdi_cap_stable_pages_required(bdi)) + return; + + wait_on_page_writeback(page); +} +EXPORT_SYMBOL_GPL(wait_for_stable_page); -- cgit v1.2.3 From 9a46ad6d6df3b547d057c39db13f69d7170a99e9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 21 Feb 2013 16:43:03 -0800 Subject: smp: make smp_call_function_many() use logic similar to smp_call_function_single() I'm testing swapout workload in a two-socket Xeon machine. The workload has 10 threads, each thread sequentially accesses separate memory region. TLB flush overhead is very big in the workload. For each page, page reclaim need move it from active lru list and then unmap it. Both need a TLB flush. And this is a multthread workload, TLB flush happens in 10 CPUs. In X86, TLB flush uses generic smp_call)function. So this workload stress smp_call_function_many heavily. Without patch, perf shows: + 24.49% [k] generic_smp_call_function_interrupt - 21.72% [k] _raw_spin_lock - _raw_spin_lock + 79.80% __page_check_address + 6.42% generic_smp_call_function_interrupt + 3.31% get_swap_page + 2.37% free_pcppages_bulk + 1.75% handle_pte_fault + 1.54% put_super + 1.41% grab_super_passive + 1.36% __swap_duplicate + 0.68% blk_flush_plug_list + 0.62% swap_info_get + 6.55% [k] flush_tlb_func + 6.46% [k] smp_call_function_many + 5.09% [k] call_function_interrupt + 4.75% [k] default_send_IPI_mask_sequence_phys + 2.18% [k] find_next_bit swapout throughput is around 1300M/s. With the patch, perf shows: - 27.23% [k] _raw_spin_lock - _raw_spin_lock + 80.53% __page_check_address + 8.39% generic_smp_call_function_single_interrupt + 2.44% get_swap_page + 1.76% free_pcppages_bulk + 1.40% handle_pte_fault + 1.15% __swap_duplicate + 1.05% put_super + 0.98% grab_super_passive + 0.86% blk_flush_plug_list + 0.57% swap_info_get + 8.25% [k] default_send_IPI_mask_sequence_phys + 7.55% [k] call_function_interrupt + 7.47% [k] smp_call_function_many + 7.25% [k] flush_tlb_func + 3.81% [k] _raw_spin_lock_irqsave + 3.78% [k] generic_smp_call_function_single_interrupt swapout throughput is around 1400M/s. So there is around a 7% improvement, and total cpu utilization doesn't change. Without the patch, cfd_data is shared by all CPUs. generic_smp_call_function_interrupt does read/write cfd_data several times which will create a lot of cache ping-pong. With the patch, the data becomes per-cpu. The ping-pong is avoided. And from the perf data, this doesn't make call_single_queue lock contend. Next step is to remove generic_smp_call_function_interrupt() from arch code. Signed-off-by: Shaohua Li Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Steven Rostedt Cc: Jens Axboe Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 3 +- kernel/smp.c | 183 +++++++++------------------------------------------- 2 files changed, 32 insertions(+), 154 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index dd6f06be3c9f..3e07a7df6478 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -89,7 +89,8 @@ void kick_all_cpus_sync(void); #ifdef CONFIG_USE_GENERIC_SMP_HELPERS void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); -void generic_smp_call_function_interrupt(void); +#define generic_smp_call_function_interrupt \ + generic_smp_call_function_single_interrupt #else static inline void call_function_init(void) { } #endif diff --git a/kernel/smp.c b/kernel/smp.c index 69f38bd98b42..8e451f3ff51b 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -16,22 +16,12 @@ #include "smpboot.h" #ifdef CONFIG_USE_GENERIC_SMP_HELPERS -static struct { - struct list_head queue; - raw_spinlock_t lock; -} call_function __cacheline_aligned_in_smp = - { - .queue = LIST_HEAD_INIT(call_function.queue), - .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock), - }; - enum { CSD_FLAG_LOCK = 0x01, }; struct call_function_data { - struct call_single_data csd; - atomic_t refs; + struct call_single_data __percpu *csd; cpumask_var_t cpumask; cpumask_var_t cpumask_ipi; }; @@ -60,6 +50,11 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, cpu_to_node(cpu))) return notifier_from_errno(-ENOMEM); + cfd->csd = alloc_percpu(struct call_single_data); + if (!cfd->csd) { + free_cpumask_var(cfd->cpumask); + return notifier_from_errno(-ENOMEM); + } break; #ifdef CONFIG_HOTPLUG_CPU @@ -70,6 +65,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD_FROZEN: free_cpumask_var(cfd->cpumask); free_cpumask_var(cfd->cpumask_ipi); + free_percpu(cfd->csd); break; #endif }; @@ -170,85 +166,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) csd_lock_wait(data); } -/* - * Invoked by arch to handle an IPI for call function. Must be called with - * interrupts disabled. - */ -void generic_smp_call_function_interrupt(void) -{ - struct call_function_data *data; - int cpu = smp_processor_id(); - - /* - * Shouldn't receive this interrupt on a cpu that is not yet online. - */ - WARN_ON_ONCE(!cpu_online(cpu)); - - /* - * Ensure entry is visible on call_function_queue after we have - * entered the IPI. See comment in smp_call_function_many. - * If we don't have this, then we may miss an entry on the list - * and never get another IPI to process it. - */ - smp_mb(); - - /* - * It's ok to use list_for_each_rcu() here even though we may - * delete 'pos', since list_del_rcu() doesn't clear ->next - */ - list_for_each_entry_rcu(data, &call_function.queue, csd.list) { - int refs; - smp_call_func_t func; - - /* - * Since we walk the list without any locks, we might - * see an entry that was completed, removed from the - * list and is in the process of being reused. - * - * We must check that the cpu is in the cpumask before - * checking the refs, and both must be set before - * executing the callback on this cpu. - */ - - if (!cpumask_test_cpu(cpu, data->cpumask)) - continue; - - smp_rmb(); - - if (atomic_read(&data->refs) == 0) - continue; - - func = data->csd.func; /* save for later warn */ - func(data->csd.info); - - /* - * If the cpu mask is not still set then func enabled - * interrupts (BUG), and this cpu took another smp call - * function interrupt and executed func(info) twice - * on this cpu. That nested execution decremented refs. - */ - if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { - WARN(1, "%pf enabled interrupts and double executed\n", func); - continue; - } - - refs = atomic_dec_return(&data->refs); - WARN_ON(refs < 0); - - if (refs) - continue; - - WARN_ON(!cpumask_empty(data->cpumask)); - - raw_spin_lock(&call_function.lock); - list_del_rcu(&data->csd.list); - raw_spin_unlock(&call_function.lock); - - csd_unlock(&data->csd); - } - -} - /* * Invoked by arch to handle an IPI for call function single. Must be * called from the arch with interrupts disabled. @@ -453,8 +370,7 @@ void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) { struct call_function_data *data; - unsigned long flags; - int refs, cpu, next_cpu, this_cpu = smp_processor_id(); + int cpu, next_cpu, this_cpu = smp_processor_id(); /* * Can deadlock when called with interrupts disabled. @@ -486,50 +402,13 @@ void smp_call_function_many(const struct cpumask *mask, } data = &__get_cpu_var(cfd_data); - csd_lock(&data->csd); - - /* This BUG_ON verifies our reuse assertions and can be removed */ - BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); - - /* - * The global call function queue list add and delete are protected - * by a lock, but the list is traversed without any lock, relying - * on the rcu list add and delete to allow safe concurrent traversal. - * We reuse the call function data without waiting for any grace - * period after some other cpu removes it from the global queue. - * This means a cpu might find our data block as it is being - * filled out. - * - * We hold off the interrupt handler on the other cpu by - * ordering our writes to the cpu mask vs our setting of the - * refs counter. We assert only the cpu owning the data block - * will set a bit in cpumask, and each bit will only be cleared - * by the subject cpu. Each cpu must first find its bit is - * set and then check that refs is set indicating the element is - * ready to be processed, otherwise it must skip the entry. - * - * On the previous iteration refs was set to 0 by another cpu. - * To avoid the use of transitivity, set the counter to 0 here - * so the wmb will pair with the rmb in the interrupt handler. - */ - atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ - - data->csd.func = func; - data->csd.info = info; - /* Ensure 0 refs is visible before mask. Also orders func and info */ - smp_wmb(); - - /* We rely on the "and" being processed before the store */ cpumask_and(data->cpumask, mask, cpu_online_mask); cpumask_clear_cpu(this_cpu, data->cpumask); - refs = cpumask_weight(data->cpumask); /* Some callers race with other cpus changing the passed mask */ - if (unlikely(!refs)) { - csd_unlock(&data->csd); + if (unlikely(!cpumask_weight(data->cpumask))) return; - } /* * After we put an entry into the list, data->cpumask @@ -537,34 +416,32 @@ void smp_call_function_many(const struct cpumask *mask, * a SMP function call, so data->cpumask will be zero. */ cpumask_copy(data->cpumask_ipi, data->cpumask); - raw_spin_lock_irqsave(&call_function.lock, flags); - /* - * Place entry at the _HEAD_ of the list, so that any cpu still - * observing the entry in generic_smp_call_function_interrupt() - * will not miss any other list entries: - */ - list_add_rcu(&data->csd.list, &call_function.queue); - /* - * We rely on the wmb() in list_add_rcu to complete our writes - * to the cpumask before this write to refs, which indicates - * data is on the list and is ready to be processed. - */ - atomic_set(&data->refs, refs); - raw_spin_unlock_irqrestore(&call_function.lock, flags); - /* - * Make the list addition visible before sending the ipi. - * (IPIs must obey or appear to obey normal Linux cache - * coherency rules -- see comment in generic_exec_single). - */ - smp_mb(); + for_each_cpu(cpu, data->cpumask) { + struct call_single_data *csd = per_cpu_ptr(data->csd, cpu); + struct call_single_queue *dst = + &per_cpu(call_single_queue, cpu); + unsigned long flags; + + csd_lock(csd); + csd->func = func; + csd->info = info; + + raw_spin_lock_irqsave(&dst->lock, flags); + list_add_tail(&csd->list, &dst->list); + raw_spin_unlock_irqrestore(&dst->lock, flags); + } /* Send a message to all CPUs in the map */ arch_send_call_function_ipi_mask(data->cpumask_ipi); - /* Optionally wait for the CPUs to complete */ - if (wait) - csd_lock_wait(&data->csd); + if (wait) { + for_each_cpu(cpu, data->cpumask) { + struct call_single_data *csd = + per_cpu_ptr(data->csd, cpu); + csd_lock_wait(csd); + } + } } EXPORT_SYMBOL(smp_call_function_many); -- cgit v1.2.3 From 36d308d8b547ee19d3fa7399858e5a1632413d0e Mon Sep 17 00:00:00 2001 From: Mikhail Gruzdev Date: Thu, 21 Feb 2013 16:43:10 -0800 Subject: printk: add pr_devel_once and pr_devel_ratelimited Standardize pr_devel logging macros family by adding pr_devel_once and pr_devel_ratelimited. Signed-off-by: Mikhail Gruzdev Acked-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 86c4b6294713..54419f4f82c2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -252,6 +252,15 @@ extern void dump_stack(void) __cold; printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) #define pr_cont_once(fmt, ...) \ printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__) + +#if defined(DEBUG) +#define pr_devel_once(fmt, ...) \ + printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel_once(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) #define pr_debug_once(fmt, ...) \ @@ -295,6 +304,15 @@ extern void dump_stack(void) __cold; #define pr_info_ratelimited(fmt, ...) \ printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) /* no pr_cont_ratelimited, don't do that... */ + +#if defined(DEBUG) +#define pr_devel_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel_ratelimited(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) #define pr_debug_ratelimited(fmt, ...) \ -- cgit v1.2.3 From 26e8ccc223ebfd2047a96074f142544dc7062cfe Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Thu, 21 Feb 2013 16:44:06 -0800 Subject: backlight: lp855x_bl: support new LP8557 device LP8557 is one of LP855x family device, but it has different register map and initialization process. To support this device, device specific configuration is done through the lp855x_device_config structure. Few register definitions are fixed for better readability. BRIGHTNESS_CTRL -> LP855X_BRIGHTNESS_CTRL DEVICE_CTRL -> LP855X_DEVICE_CTRL EEPROM_START -> LP855X_EEPROM_START EEPROM_END -> LP855X_EEPROM_END EPROM_START -> LP8556_EPROM_START EPROM_END -> LP8556_EPROM_END And LP8557 register definitions are added. New register function, lp855x_update_bit() is added. Signed-off-by: Milo(Woogyom) Kim Acked-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/backlight/lp855x-driver.txt | 4 +- drivers/video/backlight/Kconfig | 2 +- drivers/video/backlight/lp855x_bl.c | 87 +++++++++++++++++++++++++------ include/linux/platform_data/lp855x.h | 19 +++++++ 4 files changed, 94 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt index 1529394cfe8b..18b06ca038ea 100644 --- a/Documentation/backlight/lp855x-driver.txt +++ b/Documentation/backlight/lp855x-driver.txt @@ -4,7 +4,7 @@ Kernel driver lp855x Backlight driver for LP855x ICs Supported chips: - Texas Instruments LP8550, LP8551, LP8552, LP8553 and LP8556 + Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557 Author: Milo(Woogyom) Kim @@ -24,7 +24,7 @@ Value : pwm based or register based 2) chip_id The lp855x chip id. -Value : lp8550/lp8551/lp8552/lp8553/lp8556 +Value : lp8550/lp8551/lp8552/lp8553/lp8556/lp8557 Platform data for lp855x ------------------------ diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index a942a2488480..be27b551473f 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -381,7 +381,7 @@ config BACKLIGHT_LP855X tristate "Backlight driver for TI LP855X" depends on BACKLIGHT_CLASS_DEVICE && I2C help - This supports TI LP8550, LP8551, LP8552, LP8553 and LP8556 + This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557 backlight driver. config BACKLIGHT_OT200 diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c index 050cfbb53667..edd2041b1527 100644 --- a/drivers/video/backlight/lp855x_bl.c +++ b/drivers/video/backlight/lp855x_bl.c @@ -17,13 +17,23 @@ #include #include -/* Registers */ -#define BRIGHTNESS_CTRL 0x00 -#define DEVICE_CTRL 0x01 -#define EEPROM_START 0xA0 -#define EEPROM_END 0xA7 -#define EPROM_START 0xA0 -#define EPROM_END 0xAF +/* LP8550/1/2/3/6 Registers */ +#define LP855X_BRIGHTNESS_CTRL 0x00 +#define LP855X_DEVICE_CTRL 0x01 +#define LP855X_EEPROM_START 0xA0 +#define LP855X_EEPROM_END 0xA7 +#define LP8556_EPROM_START 0xA0 +#define LP8556_EPROM_END 0xAF + +/* LP8557 Registers */ +#define LP8557_BL_CMD 0x00 +#define LP8557_BL_MASK 0x01 +#define LP8557_BL_ON 0x01 +#define LP8557_BL_OFF 0x00 +#define LP8557_BRIGHTNESS_CTRL 0x04 +#define LP8557_CONFIG 0x10 +#define LP8557_EPROM_START 0x10 +#define LP8557_EPROM_END 0x1E #define BUF_SIZE 20 #define DEFAULT_BL_NAME "lcd-backlight" @@ -75,6 +85,24 @@ static int lp855x_write_byte(struct lp855x *lp, u8 reg, u8 data) return i2c_smbus_write_byte_data(lp->client, reg, data); } +static int lp855x_update_bit(struct lp855x *lp, u8 reg, u8 mask, u8 data) +{ + int ret; + u8 tmp; + + ret = i2c_smbus_read_byte_data(lp->client, reg); + if (ret < 0) { + dev_err(lp->dev, "failed to read 0x%.2x\n", reg); + return ret; + } + + tmp = (u8)ret; + tmp &= ~mask; + tmp |= data & mask; + + return lp855x_write_byte(lp, reg, tmp); +} + static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr) { u8 start, end; @@ -84,12 +112,16 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr) case LP8551: case LP8552: case LP8553: - start = EEPROM_START; - end = EEPROM_END; + start = LP855X_EEPROM_START; + end = LP855X_EEPROM_END; break; case LP8556: - start = EPROM_START; - end = EPROM_END; + start = LP8556_EPROM_START; + end = LP8556_EPROM_END; + break; + case LP8557: + start = LP8557_EPROM_START; + end = LP8557_EPROM_END; break; default: return false; @@ -98,9 +130,30 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr) return (addr >= start && addr <= end); } +static int lp8557_bl_off(struct lp855x *lp) +{ + /* BL_ON = 0 before updating EPROM settings */ + return lp855x_update_bit(lp, LP8557_BL_CMD, LP8557_BL_MASK, + LP8557_BL_OFF); +} + +static int lp8557_bl_on(struct lp855x *lp) +{ + /* BL_ON = 1 after updating EPROM settings */ + return lp855x_update_bit(lp, LP8557_BL_CMD, LP8557_BL_MASK, + LP8557_BL_ON); +} + static struct lp855x_device_config lp855x_dev_cfg = { - .reg_brightness = BRIGHTNESS_CTRL, - .reg_devicectrl = DEVICE_CTRL, + .reg_brightness = LP855X_BRIGHTNESS_CTRL, + .reg_devicectrl = LP855X_DEVICE_CTRL, +}; + +static struct lp855x_device_config lp8557_dev_cfg = { + .reg_brightness = LP8557_BRIGHTNESS_CTRL, + .reg_devicectrl = LP8557_CONFIG, + .pre_init_device = lp8557_bl_off, + .post_init_device = lp8557_bl_on, }; /* @@ -123,6 +176,9 @@ static int lp855x_configure(struct lp855x *lp) case LP8550 ... LP8556: lp->cfg = &lp855x_dev_cfg; break; + case LP8557: + lp->cfg = &lp8557_dev_cfg; + break; default: return -EINVAL; } @@ -210,7 +266,7 @@ static int lp855x_bl_update_status(struct backlight_device *bl) } else if (mode == REGISTER_BASED) { u8 val = bl->props.brightness; - lp855x_write_byte(lp, BRIGHTNESS_CTRL, val); + lp855x_write_byte(lp, lp->cfg->reg_brightness, val); } return 0; @@ -224,7 +280,7 @@ static int lp855x_bl_get_brightness(struct backlight_device *bl) if (mode == REGISTER_BASED) { u8 val = 0; - lp855x_read_byte(lp, BRIGHTNESS_CTRL, &val); + lp855x_read_byte(lp, lp->cfg->reg_brightness, &val); bl->props.brightness = val; } @@ -376,6 +432,7 @@ static const struct i2c_device_id lp855x_ids[] = { {"lp8552", LP8552}, {"lp8553", LP8553}, {"lp8556", LP8556}, + {"lp8557", LP8557}, { } }; MODULE_DEVICE_TABLE(i2c, lp855x_ids); diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h index e81f62d24ee2..20ee8b221dbd 100644 --- a/include/linux/platform_data/lp855x.h +++ b/include/linux/platform_data/lp855x.h @@ -49,12 +49,24 @@ #define LP8556_FAST_CONFIG BIT(7) /* use it if EPROMs should be maintained when exiting the low power mode */ +/* CONFIG register - LP8557 */ +#define LP8557_PWM_STANDBY BIT(7) +#define LP8557_PWM_FILTER BIT(6) +#define LP8557_RELOAD_EPROM BIT(3) /* use it if EPROMs should be reset + when the backlight turns on */ +#define LP8557_OFF_OPENLEDS BIT(2) +#define LP8557_PWM_CONFIG LP8557_PWM_ONLY +#define LP8557_I2C_CONFIG LP8557_I2C_ONLY +#define LP8557_COMB1_CONFIG LP8557_COMBINED1 +#define LP8557_COMB2_CONFIG LP8557_COMBINED2 + enum lp855x_chip_id { LP8550, LP8551, LP8552, LP8553, LP8556, + LP8557, }; enum lp855x_brightness_ctrl_mode { @@ -89,6 +101,13 @@ enum lp8556_brightness_source { LP8556_COMBINED2, /* pwm + i2c after the shaper block */ }; +enum lp8557_brightness_source { + LP8557_PWM_ONLY, + LP8557_I2C_ONLY, + LP8557_COMBINED1, /* pwm + i2c after the shaper block */ + LP8557_COMBINED2, /* pwm + i2c before the shaper block */ +}; + struct lp855x_rom_data { u8 addr; u8 val; -- cgit v1.2.3 From f142d3bd556c5e82e9bb3d33d07d6708702ea4ce Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 21 Nov 2012 15:29:29 +0100 Subject: video: Add generic HDMI infoframe helpers Add generic helpers to pack HDMI infoframes into binary buffers. Signed-off-by: Thierry Reding Reviewed-by: Alex Deucher --- drivers/video/Kconfig | 3 + drivers/video/Makefile | 1 + drivers/video/hdmi.c | 308 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/hdmi.h | 231 +++++++++++++++++++++++++++++++++++++ 4 files changed, 543 insertions(+) create mode 100644 drivers/video/hdmi.c create mode 100644 include/linux/hdmi.h (limited to 'include/linux') diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 09f1a18c1adf..b11eeab94151 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -52,6 +52,9 @@ config OF_VIDEOMODE help helper to get videomodes from the devicetree +config HDMI + bool + menuconfig FB tristate "Support for frame buffer devices" ---help--- diff --git a/drivers/video/Makefile b/drivers/video/Makefile index f592f3b32ec7..0b50082635e3 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -5,6 +5,7 @@ # Each configuration option enables a list of files. obj-$(CONFIG_VGASTATE) += vgastate.o +obj-$(CONFIG_HDMI) += hdmi.o obj-y += fb_notify.o obj-$(CONFIG_FB) += fb.o fb-y := fbmem.o fbmon.o fbcmap.o fbsysfs.o \ diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c new file mode 100644 index 000000000000..ab23c9b79143 --- /dev/null +++ b/drivers/video/hdmi.c @@ -0,0 +1,308 @@ +/* + * Copyright (C) 2012 Avionic Design GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +static void hdmi_infoframe_checksum(void *buffer, size_t size) +{ + u8 *ptr = buffer; + u8 csum = 0; + size_t i; + + /* compute checksum */ + for (i = 0; i < size; i++) + csum += ptr[i]; + + ptr[3] = 256 - csum; +} + +/** + * hdmi_avi_infoframe_init() - initialize an HDMI AVI infoframe + * @frame: HDMI AVI infoframe + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame) +{ + memset(frame, 0, sizeof(*frame)); + + frame->type = HDMI_INFOFRAME_TYPE_AVI; + frame->version = 2; + frame->length = 13; + + return 0; +} +EXPORT_SYMBOL(hdmi_avi_infoframe_init); + +/** + * hdmi_avi_infoframe_pack() - write HDMI AVI infoframe to binary buffer + * @frame: HDMI AVI infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Packs the information contained in the @frame structure into a binary + * representation that can be written into the corresponding controller + * registers. Also computes the checksum as required by section 5.3.5 of + * the HDMI 1.4 specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, + size_t size) +{ + u8 *ptr = buffer; + size_t length; + + length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; + + if (size < length) + return -ENOSPC; + + memset(buffer, 0, length); + + ptr[0] = frame->type; + ptr[1] = frame->version; + ptr[2] = frame->length; + ptr[3] = 0; /* checksum */ + + /* start infoframe payload */ + ptr += HDMI_INFOFRAME_HEADER_SIZE; + + ptr[0] = ((frame->colorspace & 0x3) << 5) | (frame->scan_mode & 0x3); + + if (frame->active_info_valid) + ptr[0] |= BIT(4); + + if (frame->horizontal_bar_valid) + ptr[0] |= BIT(3); + + if (frame->vertical_bar_valid) + ptr[0] |= BIT(2); + + ptr[1] = ((frame->colorimetry & 0x3) << 6) | + ((frame->picture_aspect & 0x3) << 4) | + (frame->active_aspect & 0xf); + + ptr[2] = ((frame->extended_colorimetry & 0x7) << 4) | + ((frame->quantization_range & 0x3) << 2) | + (frame->nups & 0x3); + + if (frame->itc) + ptr[2] |= BIT(7); + + ptr[3] = frame->video_code & 0x7f; + + ptr[4] = ((frame->ycc_quantization_range & 0x3) << 6) | + ((frame->content_type & 0x3) << 4) | + (frame->pixel_repeat & 0xf); + + ptr[5] = frame->top_bar & 0xff; + ptr[6] = (frame->top_bar >> 8) & 0xff; + ptr[7] = frame->bottom_bar & 0xff; + ptr[8] = (frame->bottom_bar >> 8) & 0xff; + ptr[9] = frame->left_bar & 0xff; + ptr[10] = (frame->left_bar >> 8) & 0xff; + ptr[11] = frame->right_bar & 0xff; + ptr[12] = (frame->right_bar >> 8) & 0xff; + + hdmi_infoframe_checksum(buffer, length); + + return length; +} +EXPORT_SYMBOL(hdmi_avi_infoframe_pack); + +/** + * hdmi_spd_infoframe_init() - initialize an HDMI SPD infoframe + * @frame: HDMI SPD infoframe + * @vendor: vendor string + * @product: product string + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_spd_infoframe_init(struct hdmi_spd_infoframe *frame, + const char *vendor, const char *product) +{ + memset(frame, 0, sizeof(*frame)); + + frame->type = HDMI_INFOFRAME_TYPE_SPD; + frame->version = 1; + frame->length = 25; + + strncpy(frame->vendor, vendor, sizeof(frame->vendor)); + strncpy(frame->product, product, sizeof(frame->product)); + + return 0; +} +EXPORT_SYMBOL(hdmi_spd_infoframe_init); + +/** + * hdmi_spd_infoframe_pack() - write HDMI SPD infoframe to binary buffer + * @frame: HDMI SPD infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Packs the information contained in the @frame structure into a binary + * representation that can be written into the corresponding controller + * registers. Also computes the checksum as required by section 5.3.5 of + * the HDMI 1.4 specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, void *buffer, + size_t size) +{ + u8 *ptr = buffer; + size_t length; + + length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; + + if (size < length) + return -ENOSPC; + + memset(buffer, 0, length); + + ptr[0] = frame->type; + ptr[1] = frame->version; + ptr[2] = frame->length; + ptr[3] = 0; /* checksum */ + + /* start infoframe payload */ + ptr += HDMI_INFOFRAME_HEADER_SIZE; + + memcpy(ptr, frame->vendor, sizeof(frame->vendor)); + memcpy(ptr + 8, frame->product, sizeof(frame->product)); + + ptr[24] = frame->sdi; + + hdmi_infoframe_checksum(buffer, length); + + return length; +} +EXPORT_SYMBOL(hdmi_spd_infoframe_pack); + +/** + * hdmi_audio_infoframe_init() - initialize an HDMI audio infoframe + * @frame: HDMI audio infoframe + * + * Returns 0 on success or a negative error code on failure. + */ +int hdmi_audio_infoframe_init(struct hdmi_audio_infoframe *frame) +{ + memset(frame, 0, sizeof(*frame)); + + frame->type = HDMI_INFOFRAME_TYPE_AUDIO; + frame->version = 1; + frame->length = 10; + + return 0; +} +EXPORT_SYMBOL(hdmi_audio_infoframe_init); + +/** + * hdmi_audio_infoframe_pack() - write HDMI audio infoframe to binary buffer + * @frame: HDMI audio infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Packs the information contained in the @frame structure into a binary + * representation that can be written into the corresponding controller + * registers. Also computes the checksum as required by section 5.3.5 of + * the HDMI 1.4 specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame, + void *buffer, size_t size) +{ + unsigned char channels; + u8 *ptr = buffer; + size_t length; + + length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; + + if (size < length) + return -ENOSPC; + + memset(buffer, 0, length); + + if (frame->channels >= 2) + channels = frame->channels - 1; + else + channels = 0; + + ptr[0] = frame->type; + ptr[1] = frame->version; + ptr[2] = frame->length; + ptr[3] = 0; /* checksum */ + + /* start infoframe payload */ + ptr += HDMI_INFOFRAME_HEADER_SIZE; + + ptr[0] = ((frame->coding_type & 0xf) << 4) | (channels & 0x7); + ptr[1] = ((frame->sample_frequency & 0x7) << 2) | + (frame->sample_size & 0x3); + ptr[2] = frame->coding_type_ext & 0x1f; + ptr[3] = frame->channel_allocation; + ptr[4] = (frame->level_shift_value & 0xf) << 3; + + if (frame->downmix_inhibit) + ptr[4] |= BIT(7); + + hdmi_infoframe_checksum(buffer, length); + + return length; +} +EXPORT_SYMBOL(hdmi_audio_infoframe_pack); + +/** + * hdmi_vendor_infoframe_pack() - write a HDMI vendor infoframe to binary + * buffer + * @frame: HDMI vendor infoframe + * @buffer: destination buffer + * @size: size of buffer + * + * Packs the information contained in the @frame structure into a binary + * representation that can be written into the corresponding controller + * registers. Also computes the checksum as required by section 5.3.5 of + * the HDMI 1.4 specification. + * + * Returns the number of bytes packed into the binary buffer or a negative + * error code on failure. + */ +ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame, + void *buffer, size_t size) +{ + u8 *ptr = buffer; + size_t length; + + length = HDMI_INFOFRAME_HEADER_SIZE + frame->length; + + if (size < length) + return -ENOSPC; + + memset(buffer, 0, length); + + ptr[0] = frame->type; + ptr[1] = frame->version; + ptr[2] = frame->length; + ptr[3] = 0; /* checksum */ + + memcpy(&ptr[HDMI_INFOFRAME_HEADER_SIZE], frame->data, frame->length); + + hdmi_infoframe_checksum(buffer, length); + + return length; +} +EXPORT_SYMBOL(hdmi_vendor_infoframe_pack); diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h new file mode 100644 index 000000000000..3b589440ecfe --- /dev/null +++ b/include/linux/hdmi.h @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2012 Avionic Design GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_HDMI_H_ +#define __LINUX_HDMI_H_ + +#include + +enum hdmi_infoframe_type { + HDMI_INFOFRAME_TYPE_VENDOR = 0x81, + HDMI_INFOFRAME_TYPE_AVI = 0x82, + HDMI_INFOFRAME_TYPE_SPD = 0x83, + HDMI_INFOFRAME_TYPE_AUDIO = 0x84, +}; + +#define HDMI_INFOFRAME_HEADER_SIZE 4 +#define HDMI_AVI_INFOFRAME_SIZE 13 +#define HDMI_SPD_INFOFRAME_SIZE 25 +#define HDMI_AUDIO_INFOFRAME_SIZE 10 + +enum hdmi_colorspace { + HDMI_COLORSPACE_RGB, + HDMI_COLORSPACE_YUV422, + HDMI_COLORSPACE_YUV444, +}; + +enum hdmi_scan_mode { + HDMI_SCAN_MODE_NONE, + HDMI_SCAN_MODE_OVERSCAN, + HDMI_SCAN_MODE_UNDERSCAN, +}; + +enum hdmi_colorimetry { + HDMI_COLORIMETRY_NONE, + HDMI_COLORIMETRY_ITU_601, + HDMI_COLORIMETRY_ITU_709, + HDMI_COLORIMETRY_EXTENDED, +}; + +enum hdmi_picture_aspect { + HDMI_PICTURE_ASPECT_NONE, + HDMI_PICTURE_ASPECT_4_3, + HDMI_PICTURE_ASPECT_16_9, +}; + +enum hdmi_active_aspect { + HDMI_ACTIVE_ASPECT_16_9_TOP = 2, + HDMI_ACTIVE_ASPECT_14_9_TOP = 3, + HDMI_ACTIVE_ASPECT_16_9_CENTER = 4, + HDMI_ACTIVE_ASPECT_PICTURE = 8, + HDMI_ACTIVE_ASPECT_4_3 = 9, + HDMI_ACTIVE_ASPECT_16_9 = 10, + HDMI_ACTIVE_ASPECT_14_9 = 11, + HDMI_ACTIVE_ASPECT_4_3_SP_14_9 = 13, + HDMI_ACTIVE_ASPECT_16_9_SP_14_9 = 14, + HDMI_ACTIVE_ASPECT_16_9_SP_4_3 = 15, +}; + +enum hdmi_extended_colorimetry { + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601, + HDMI_EXTENDED_COLORIMETRY_XV_YCC_709, + HDMI_EXTENDED_COLORIMETRY_S_YCC_601, + HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601, + HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB, +}; + +enum hdmi_quantization_range { + HDMI_QUANTIZATION_RANGE_DEFAULT, + HDMI_QUANTIZATION_RANGE_LIMITED, + HDMI_QUANTIZATION_RANGE_FULL, +}; + +/* non-uniform picture scaling */ +enum hdmi_nups { + HDMI_NUPS_UNKNOWN, + HDMI_NUPS_HORIZONTAL, + HDMI_NUPS_VERTICAL, + HDMI_NUPS_BOTH, +}; + +enum hdmi_ycc_quantization_range { + HDMI_YCC_QUANTIZATION_RANGE_LIMITED, + HDMI_YCC_QUANTIZATION_RANGE_FULL, +}; + +enum hdmi_content_type { + HDMI_CONTENT_TYPE_NONE, + HDMI_CONTENT_TYPE_PHOTO, + HDMI_CONTENT_TYPE_CINEMA, + HDMI_CONTENT_TYPE_GAME, +}; + +struct hdmi_avi_infoframe { + enum hdmi_infoframe_type type; + unsigned char version; + unsigned char length; + enum hdmi_colorspace colorspace; + bool active_info_valid; + bool horizontal_bar_valid; + bool vertical_bar_valid; + enum hdmi_scan_mode scan_mode; + enum hdmi_colorimetry colorimetry; + enum hdmi_picture_aspect picture_aspect; + enum hdmi_active_aspect active_aspect; + bool itc; + enum hdmi_extended_colorimetry extended_colorimetry; + enum hdmi_quantization_range quantization_range; + enum hdmi_nups nups; + unsigned char video_code; + enum hdmi_ycc_quantization_range ycc_quantization_range; + enum hdmi_content_type content_type; + unsigned char pixel_repeat; + unsigned short top_bar; + unsigned short bottom_bar; + unsigned short left_bar; + unsigned short right_bar; +}; + +int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame); +ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, + size_t size); + +enum hdmi_spd_sdi { + HDMI_SPD_SDI_UNKNOWN, + HDMI_SPD_SDI_DSTB, + HDMI_SPD_SDI_DVDP, + HDMI_SPD_SDI_DVHS, + HDMI_SPD_SDI_HDDVR, + HDMI_SPD_SDI_DVC, + HDMI_SPD_SDI_DSC, + HDMI_SPD_SDI_VCD, + HDMI_SPD_SDI_GAME, + HDMI_SPD_SDI_PC, + HDMI_SPD_SDI_BD, + HDMI_SPD_SDI_SACD, + HDMI_SPD_SDI_HDDVD, + HDMI_SPD_SDI_PMP, +}; + +struct hdmi_spd_infoframe { + enum hdmi_infoframe_type type; + unsigned char version; + unsigned char length; + char vendor[8]; + char product[16]; + enum hdmi_spd_sdi sdi; +}; + +int hdmi_spd_infoframe_init(struct hdmi_spd_infoframe *frame, + const char *vendor, const char *product); +ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, void *buffer, + size_t size); + +enum hdmi_audio_coding_type { + HDMI_AUDIO_CODING_TYPE_STREAM, + HDMI_AUDIO_CODING_TYPE_PCM, + HDMI_AUDIO_CODING_TYPE_AC3, + HDMI_AUDIO_CODING_TYPE_MPEG1, + HDMI_AUDIO_CODING_TYPE_MP3, + HDMI_AUDIO_CODING_TYPE_MPEG2, + HDMI_AUDIO_CODING_TYPE_AAC_LC, + HDMI_AUDIO_CODING_TYPE_DTS, + HDMI_AUDIO_CODING_TYPE_ATRAC, + HDMI_AUDIO_CODING_TYPE_DSD, + HDMI_AUDIO_CODING_TYPE_EAC3, + HDMI_AUDIO_CODING_TYPE_DTS_HD, + HDMI_AUDIO_CODING_TYPE_MLP, + HDMI_AUDIO_CODING_TYPE_DST, + HDMI_AUDIO_CODING_TYPE_WMA_PRO, +}; + +enum hdmi_audio_sample_size { + HDMI_AUDIO_SAMPLE_SIZE_STREAM, + HDMI_AUDIO_SAMPLE_SIZE_16, + HDMI_AUDIO_SAMPLE_SIZE_20, + HDMI_AUDIO_SAMPLE_SIZE_24, +}; + +enum hdmi_audio_sample_frequency { + HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM, + HDMI_AUDIO_SAMPLE_FREQUENCY_32000, + HDMI_AUDIO_SAMPLE_FREQUENCY_44100, + HDMI_AUDIO_SAMPLE_FREQUENCY_48000, + HDMI_AUDIO_SAMPLE_FREQUENCY_88200, + HDMI_AUDIO_SAMPLE_FREQUENCY_96000, + HDMI_AUDIO_SAMPLE_FREQUENCY_176400, + HDMI_AUDIO_SAMPLE_FREQUENCY_192000, +}; + +enum hdmi_audio_coding_type_ext { + HDMI_AUDIO_CODING_TYPE_EXT_STREAM, + HDMI_AUDIO_CODING_TYPE_EXT_HE_AAC, + HDMI_AUDIO_CODING_TYPE_EXT_HE_AAC_V2, + HDMI_AUDIO_CODING_TYPE_EXT_MPEG_SURROUND, +}; + +struct hdmi_audio_infoframe { + enum hdmi_infoframe_type type; + unsigned char version; + unsigned char length; + unsigned char channels; + enum hdmi_audio_coding_type coding_type; + enum hdmi_audio_sample_size sample_size; + enum hdmi_audio_sample_frequency sample_frequency; + enum hdmi_audio_coding_type_ext coding_type_ext; + unsigned char channel_allocation; + unsigned char level_shift_value; + bool downmix_inhibit; + +}; + +int hdmi_audio_infoframe_init(struct hdmi_audio_infoframe *frame); +ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame, + void *buffer, size_t size); + +struct hdmi_vendor_infoframe { + enum hdmi_infoframe_type type; + unsigned char version; + unsigned char length; + u8 data[27]; +}; + +ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame, + void *buffer, size_t size); + +#endif /* _DRM_HDMI_H */ -- cgit v1.2.3 From 24dea0c9feccf699749f860fa2f4ccd84d30390d Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 31 Jan 2013 21:06:34 +0400 Subject: mtd: map: BUG() in non handled cases Several map-related functions look like a serie of ifs, checking widths of map. Those functions do not have any handling for default case. Instead of fiddling with uninitialized_var in those functions, let's just add a (correct) BUG() to the default case on those maps. This will also allow us to catch potential errors in maps setup in future. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Artem Bityutskiy --- include/linux/mtd/map.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 8b9bfd7dcaa3..4b02512e421c 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -329,7 +329,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word static inline map_word map_word_load(struct map_info *map, const void *ptr) { - map_word r = {{0} }; + map_word r; if (map_bankwidth_is_1(map)) r.x[0] = *(unsigned char *)ptr; @@ -343,6 +343,8 @@ static inline map_word map_word_load(struct map_info *map, const void *ptr) #endif else if (map_bankwidth_is_large(map)) memcpy(r.x, ptr, map->bankwidth); + else + BUG(); return r; } @@ -392,7 +394,7 @@ static inline map_word map_word_ff(struct map_info *map) static inline map_word inline_map_read(struct map_info *map, unsigned long ofs) { - map_word uninitialized_var(r); + map_word r; if (map_bankwidth_is_1(map)) r.x[0] = __raw_readb(map->virt + ofs); @@ -426,6 +428,8 @@ static inline void inline_map_write(struct map_info *map, const map_word datum, #endif else if (map_bankwidth_is_large(map)) memcpy_toio(map->virt+ofs, datum.x, map->bankwidth); + else + BUG(); mb(); } -- cgit v1.2.3 From 45ebd3945b2a3cf4eb89d5fb0090a3cb71af7973 Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Wed, 20 Feb 2013 09:19:09 -0600 Subject: sched: Move RR_TIMESLICE from sysctl.h to rt.h This fixes an ia64 build bug reported by Tony Luck. Reported-by: Tony Luck Signed-off-by: Clark Williams Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1361373550-4011-2-git-send-email-clark.williams@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched/rt.h | 6 ++++++ include/linux/sched/sysctl.h | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 94e19ea28fc3..440434df3627 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -55,4 +55,10 @@ static inline bool tsk_is_pi_blocked(struct task_struct *tsk) extern void normalize_rt_tasks(void); +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + #endif /* _SCHED_RT_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d2bb0ae979d0..bf8086b2506e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -91,12 +91,6 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice; extern unsigned int sysctl_sched_autogroup_enabled; #endif -/* - * default timeslice is 100 msecs (used only for SCHED_RR tasks). - * Timeslices get refilled after they expire. - */ -#define RR_TIMESLICE (100 * HZ / 1000) - extern int sched_rr_timeslice; extern int sched_rr_handler(struct ctl_table *table, int write, -- cgit v1.2.3 From bc681593b588786e6326b3e5f78ccc1683e2269c Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Fri, 22 Feb 2013 09:20:11 -0800 Subject: sched: move RR_TIMESLICE from sysctl.h to rt.h Originally submitted by Clark Williams as part of a cleanup, but happens also to fix an ia64 build problem: arch/ia64/kernel/init_task.c:38: error: 'RR_TIMESLICE' undeclared here (not in a function) Signed-off-by: Clark Williams Signed-off-by: Tony Luck --- include/linux/sched/rt.h | 6 ++++++ include/linux/sched/sysctl.h | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 94e19ea28fc3..440434df3627 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -55,4 +55,10 @@ static inline bool tsk_is_pi_blocked(struct task_struct *tsk) extern void normalize_rt_tasks(void); +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + #endif /* _SCHED_RT_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d2bb0ae979d0..bf8086b2506e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -91,12 +91,6 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice; extern unsigned int sysctl_sched_autogroup_enabled; #endif -/* - * default timeslice is 100 msecs (used only for SCHED_RR tasks). - * Timeslices get refilled after they expire. - */ -#define RR_TIMESLICE (100 * HZ / 1000) - extern int sched_rr_timeslice; extern int sched_rr_handler(struct ctl_table *table, int write, -- cgit v1.2.3 From d6561bb206aae9de8eee4516549339ee96386b87 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 21 Feb 2013 11:04:45 +0000 Subject: PM / OPP: fix condition for empty of_init_opp_table() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit randconfig build reports the following error which is caused by CONFIG_PM_OPP being unset. CC arch/arm/mach-imx/mach-imx6q.o arch/arm/mach-imx/mach-imx6q.c: In function ‘imx6q_opp_init’: arch/arm/mach-imx/mach-imx6q.c:248:2: error: implicit declaration of function ‘of_init_opp_table’ [-Werror=implicit-function-declaration] Fix the error by giving a more correct condition for empty of_init_opp_table() implementation. Reported-by: Rob Herring Signed-off-by: Shawn Guo Acked-by: Nishanth Menon Signed-off-by: Rafael J. Wysocki --- include/linux/opp.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/opp.h b/include/linux/opp.h index 214e0ebcb84d..3aca2b8def33 100644 --- a/include/linux/opp.h +++ b/include/linux/opp.h @@ -47,15 +47,6 @@ int opp_enable(struct device *dev, unsigned long freq); int opp_disable(struct device *dev, unsigned long freq); struct srcu_notifier_head *opp_get_notifier(struct device *dev); - -#ifdef CONFIG_OF -int of_init_opp_table(struct device *dev); -#else -static inline int of_init_opp_table(struct device *dev) -{ - return -EINVAL; -} -#endif /* CONFIG_OF */ #else static inline unsigned long opp_get_voltage(struct opp *opp) { @@ -112,6 +103,15 @@ static inline struct srcu_notifier_head *opp_get_notifier(struct device *dev) } #endif /* CONFIG_PM_OPP */ +#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) +int of_init_opp_table(struct device *dev); +#else +static inline int of_init_opp_table(struct device *dev) +{ + return -EINVAL; +} +#endif + #if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) int opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table); -- cgit v1.2.3 From 496ad9aa8ef448058e36ca7a787c61f2e63f0f54 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Jan 2013 17:07:38 -0500 Subject: new helper: file_inode(file) Signed-off-by: Al Viro --- arch/alpha/kernel/srm_env.c | 2 +- arch/blackfin/kernel/cplbinfo.c | 2 +- arch/cris/arch-v10/drivers/sync_serial.c | 8 ++-- arch/cris/arch-v32/drivers/cryptocop.c | 3 +- arch/cris/arch-v32/drivers/sync_serial.c | 8 ++-- arch/ia64/kernel/salinfo.c | 6 +-- arch/mips/kernel/rtlx.c | 13 ++----- arch/mips/kernel/vpe.c | 2 +- arch/mips/lasat/picvue_proc.c | 2 +- arch/powerpc/kernel/proc_powerpc.c | 6 +-- arch/powerpc/kernel/rtas_flash.c | 16 ++++---- arch/powerpc/platforms/cell/spufs/coredump.c | 4 +- arch/powerpc/platforms/cell/spufs/file.c | 6 +-- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/powerpc/platforms/cell/spufs/syscalls.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 2 +- arch/powerpc/platforms/pseries/scanlog.c | 8 +--- arch/s390/hypfs/hypfs_dbfs.c | 2 +- arch/s390/hypfs/inode.c | 2 +- arch/s390/kernel/debug.c | 2 +- arch/s390/pci/pci_debug.c | 4 +- arch/sh/mm/alignment.c | 2 +- arch/x86/ia32/ia32_aout.c | 6 +-- arch/x86/kernel/cpuid.c | 4 +- drivers/block/DAC960.c | 2 +- drivers/block/nbd.c | 2 +- drivers/char/dsp56k.c | 8 ++-- drivers/char/dtlk.c | 4 +- drivers/char/lp.c | 8 ++-- drivers/char/mem.c | 4 +- drivers/char/nsc_gpio.c | 4 +- drivers/char/pcmcia/cm4000_cs.c | 2 +- drivers/char/ppdev.c | 6 +-- drivers/char/ps3flash.c | 2 +- drivers/char/raw.c | 2 +- drivers/char/sonypi.c | 2 +- drivers/char/tb0219.c | 4 +- drivers/gpu/drm/gma500/gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 10 ++--- drivers/gpu/drm/ttm/ttm_tt.c | 4 +- drivers/gpu/drm/udl/udl_gem.c | 2 +- drivers/hid/hid-roccat.c | 2 +- drivers/hid/hidraw.c | 6 +-- drivers/i2c/i2c-dev.c | 4 +- drivers/ide/ide-proc.c | 4 +- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/hw/ipath/ipath_file_ops.c | 4 +- drivers/infiniband/hw/ipath/ipath_fs.c | 6 +-- drivers/infiniband/hw/qib/qib_file_ops.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 4 +- drivers/iommu/tegra-smmu.c | 4 +- drivers/isdn/hardware/eicon/divasproc.c | 6 +-- drivers/isdn/hysdn/hysdn_proclog.c | 4 +- drivers/isdn/i4l/isdn_common.c | 8 ++-- drivers/isdn/i4l/isdn_ppp.c | 2 +- drivers/md/bitmap.c | 4 +- drivers/media/pci/zoran/zoran_procfs.c | 2 +- drivers/media/rc/lirc_dev.c | 14 +++---- drivers/media/v4l2-core/v4l2-dev.c | 2 +- drivers/mtd/ubi/cdev.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/wan/cosa.c | 4 +- drivers/net/wireless/ray_cs.c | 2 +- drivers/parisc/led.c | 2 +- drivers/pci/proc.c | 10 ++--- drivers/platform/x86/sony-laptop.c | 2 +- drivers/platform/x86/thinkpad_acpi.c | 2 +- drivers/platform/x86/toshiba_acpi.c | 8 ++-- drivers/pnp/isapnp/proc.c | 4 +- drivers/pnp/pnpbios/proc.c | 2 +- drivers/s390/char/fs3270.c | 4 +- drivers/s390/char/tape_char.c | 8 ++-- drivers/s390/char/vmur.c | 2 +- drivers/s390/cio/qdio_debug.c | 4 +- drivers/sbus/char/display7seg.c | 2 +- drivers/scsi/3w-9xxx.c | 2 +- drivers/scsi/3w-sas.c | 2 +- drivers/scsi/3w-xxxx.c | 2 +- drivers/scsi/csiostor/csio_init.c | 2 +- drivers/scsi/dpt_i2o.c | 4 +- drivers/scsi/st.c | 2 +- drivers/staging/bcm/Misc.c | 2 +- drivers/staging/ccg/f_mass_storage.c | 2 +- drivers/staging/ccg/rndis.c | 2 +- drivers/staging/ccg/storage_common.c | 2 +- drivers/staging/dgrp/dgrp_specproc.c | 4 +- drivers/staging/omapdrm/omap_gem_helpers.c | 2 +- drivers/staging/usbip/usbip_common.c | 2 +- drivers/staging/vme/devices/vme_user.c | 8 ++-- drivers/target/target_core_file.c | 2 +- drivers/tty/vt/vc_screen.c | 8 ++-- drivers/usb/core/devices.c | 4 +- drivers/usb/core/devio.c | 6 +-- drivers/usb/gadget/atmel_usba_udc.c | 8 ++-- drivers/usb/gadget/f_mass_storage.c | 2 +- drivers/usb/gadget/printer.c | 2 +- drivers/usb/gadget/rndis.c | 2 +- drivers/usb/gadget/storage_common.c | 2 +- drivers/video/fb_defio.c | 2 +- drivers/video/fbmem.c | 2 +- drivers/video/msm/mdp.c | 2 +- drivers/watchdog/cpwd.c | 4 +- drivers/zorro/proc.c | 4 +- fs/9p/vfs_file.c | 10 ++--- fs/adfs/dir.c | 2 +- fs/affs/dir.c | 2 +- fs/afs/dir.c | 4 +- fs/afs/flock.c | 4 +- fs/afs/write.c | 7 ++-- fs/autofs4/autofs_i.h | 2 +- fs/autofs4/dev-ioctl.c | 2 +- fs/autofs4/root.c | 4 +- fs/befs/linuxvfs.c | 2 +- fs/bfs/dir.c | 2 +- fs/binfmt_aout.c | 4 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 4 +- fs/binfmt_flat.c | 2 +- fs/binfmt_misc.c | 4 +- fs/block_dev.c | 2 +- fs/btrfs/file.c | 8 ++-- fs/btrfs/inode.c | 4 +- fs/btrfs/ioctl.c | 52 ++++++++++++------------- fs/btrfs/send.c | 2 +- fs/buffer.c | 4 +- fs/ceph/addr.c | 12 +++--- fs/ceph/dir.c | 6 +-- fs/ceph/file.c | 10 ++--- fs/ceph/ioctl.c | 16 ++++---- fs/ceph/locks.c | 2 +- fs/cifs/cifsfs.c | 6 +-- fs/cifs/file.c | 26 ++++++------- fs/cifs/inode.c | 8 ++-- fs/cifs/ioctl.c | 2 +- fs/cifs/readdir.c | 4 +- fs/coda/dir.c | 2 +- fs/coda/file.c | 12 +++--- fs/coda/inode.c | 2 +- fs/coda/pioctl.c | 2 +- fs/compat_ioctl.c | 2 +- fs/configfs/dir.c | 2 +- fs/coredump.c | 4 +- fs/cramfs/inode.c | 2 +- fs/ecryptfs/file.c | 4 +- fs/efs/dir.c | 2 +- fs/exec.c | 8 ++-- fs/exofs/dir.c | 2 +- fs/ext2/dir.c | 2 +- fs/ext2/ioctl.c | 2 +- fs/ext3/dir.c | 8 ++-- fs/ext3/ioctl.c | 2 +- fs/ext3/namei.c | 4 +- fs/ext4/dir.c | 8 ++-- fs/ext4/extents.c | 4 +- fs/ext4/file.c | 2 +- fs/ext4/inline.c | 2 +- fs/ext4/inode.c | 6 +-- fs/ext4/ioctl.c | 2 +- fs/ext4/move_extent.c | 6 +-- fs/ext4/namei.c | 2 +- fs/ext4/super.c | 2 +- fs/f2fs/dir.c | 2 +- fs/fat/dir.c | 6 +-- fs/fat/file.c | 4 +- fs/fcntl.c | 2 +- fs/file_table.c | 2 +- fs/freevxfs/vxfs_lookup.c | 2 +- fs/fuse/control.c | 2 +- fs/fuse/dir.c | 2 +- fs/gfs2/file.c | 17 ++++---- fs/gfs2/rgrp.c | 2 +- fs/hfs/dir.c | 2 +- fs/hfs/inode.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/hfsplus/ioctl.c | 4 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/dir.c | 4 +- fs/hpfs/file.c | 2 +- fs/hppfs/hppfs.c | 8 ++-- fs/hugetlbfs/inode.c | 2 +- fs/inode.c | 2 +- fs/ioctl.c | 12 +++--- fs/isofs/compress.c | 2 +- fs/isofs/dir.c | 2 +- fs/jffs2/dir.c | 4 +- fs/jfs/ioctl.c | 2 +- fs/jfs/jfs_dtree.c | 2 +- fs/lockd/clntlock.c | 2 +- fs/lockd/clntproc.c | 2 +- fs/lockd/svclock.c | 16 ++++---- fs/lockd/svcsubs.c | 2 +- fs/locks.c | 24 ++++++------ fs/logfs/dir.c | 4 +- fs/logfs/file.c | 2 +- fs/minix/dir.c | 2 +- fs/namei.c | 2 +- fs/namespace.c | 2 +- fs/ncpfs/inode.c | 4 +- fs/ncpfs/ioctl.c | 2 +- fs/ncpfs/mmap.c | 2 +- fs/nfs/dir.c | 8 ++-- fs/nfs/file.c | 2 +- fs/nfs/idmap.c | 2 +- fs/nfs/inode.c | 4 +- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4file.c | 2 +- fs/nfs/proc.c | 2 +- fs/nfsd/fault_inject.c | 6 +-- fs/nfsd/nfsctl.c | 2 +- fs/nfsd/vfs.c | 6 +-- fs/nilfs2/dir.c | 2 +- fs/nilfs2/file.c | 2 +- fs/nilfs2/ioctl.c | 2 +- fs/notify/dnotify/dnotify.c | 4 +- fs/notify/fanotify/fanotify_user.c | 2 +- fs/ntfs/dir.c | 2 +- fs/ocfs2/aops.c | 4 +- fs/ocfs2/dir.c | 2 +- fs/ocfs2/dlmfs/dlmfs.c | 6 +-- fs/ocfs2/file.c | 10 ++--- fs/ocfs2/ioctl.c | 4 +- fs/ocfs2/mmap.c | 8 ++-- fs/ocfs2/move_extents.c | 2 +- fs/ocfs2/refcounttree.c | 4 +- fs/omfs/dir.c | 4 +- fs/open.c | 6 +-- fs/openpromfs/inode.c | 2 +- fs/pipe.c | 16 ++++---- fs/proc/base.c | 38 +++++++++--------- fs/proc/generic.c | 10 ++--- fs/proc/inode.c | 14 +++---- fs/proc/nommu.c | 2 +- fs/proc/proc_net.c | 2 +- fs/proc/proc_sysctl.c | 4 +- fs/proc/task_mmu.c | 6 +-- fs/proc/task_nommu.c | 2 +- fs/qnx4/dir.c | 2 +- fs/qnx6/dir.c | 2 +- fs/ramfs/file-nommu.c | 2 +- fs/read_write.c | 8 ++-- fs/readdir.c | 2 +- fs/reiserfs/file.c | 2 +- fs/reiserfs/ioctl.c | 2 +- fs/reiserfs/procfs.c | 2 +- fs/romfs/super.c | 2 +- fs/splice.c | 2 +- fs/squashfs/dir.c | 2 +- fs/sync.c | 2 +- fs/sysfs/bin.c | 6 +-- fs/sysv/dir.c | 2 +- fs/ubifs/dir.c | 2 +- fs/ubifs/file.c | 2 +- fs/ubifs/ioctl.c | 2 +- fs/udf/dir.c | 2 +- fs/udf/file.c | 4 +- fs/ufs/dir.c | 2 +- fs/xfs/xfs_dfrag.c | 8 ++-- fs/xfs/xfs_file.c | 4 +- fs/xfs/xfs_ioctl.c | 6 +-- fs/xfs/xfs_ioctl32.c | 2 +- include/linux/fs.h | 9 ++++- include/linux/fsnotify.h | 2 +- include/linux/hugetlb.h | 2 +- include/linux/lockd/lockd.h | 2 +- ipc/mqueue.c | 16 ++++---- ipc/shm.c | 8 ++-- kernel/acct.c | 2 +- kernel/cgroup.c | 6 +-- kernel/events/core.c | 2 +- kernel/fork.c | 2 +- kernel/irq/proc.c | 2 +- kernel/nsproxy.c | 2 +- kernel/relay.c | 4 +- kernel/sys.c | 8 ++-- mm/fadvise.c | 2 +- mm/filemap.c | 2 +- mm/hugetlb.c | 4 +- mm/mmap.c | 8 ++-- mm/nommu.c | 12 +++--- mm/shmem.c | 12 +++--- mm/swapfile.c | 2 +- net/atm/proc.c | 2 +- net/core/net_namespace.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/netfilter/xt_recent.c | 2 +- net/netlink/af_netlink.c | 2 +- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/cache.c | 28 ++++++------- net/sunrpc/rpc_pipe.c | 10 ++--- net/unix/garbage.c | 2 +- security/apparmor/domain.c | 4 +- security/apparmor/file.c | 4 +- security/apparmor/lsm.c | 6 +-- security/integrity/ima/ima_api.c | 6 +-- security/integrity/ima/ima_crypto.c | 2 +- security/integrity/ima/ima_main.c | 4 +- security/selinux/hooks.c | 10 ++--- security/selinux/selinuxfs.c | 20 ++++------ security/smack/smack_lsm.c | 14 ++----- security/tomoyo/securityfs_if.c | 2 +- sound/core/info.c | 2 +- sound/core/pcm_native.c | 2 +- sound/oss/msnd_pinnacle.c | 6 +-- sound/oss/soundcard.c | 10 ++--- sound/sound_firmware.c | 2 +- 306 files changed, 696 insertions(+), 717 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index b9fc6c309d2e..e64559f0a82d 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -111,7 +111,7 @@ static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { int res; - srm_env_t *entry = PDE(file->f_path.dentry->d_inode)->data; + srm_env_t *entry = PDE(file_inode(file))->data; char *buf = (char *) __get_free_page(GFP_USER); unsigned long ret1, ret2; diff --git a/arch/blackfin/kernel/cplbinfo.c b/arch/blackfin/kernel/cplbinfo.c index 0bdaa517a501..e1d0b24c6070 100644 --- a/arch/blackfin/kernel/cplbinfo.c +++ b/arch/blackfin/kernel/cplbinfo.c @@ -116,7 +116,7 @@ static const struct seq_operations cplbinfo_sops = { static int cplbinfo_open(struct inode *inode, struct file *file) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); char cplb_type; unsigned int cpu; int ret; diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index c4b71710fb0e..a1c498d18d31 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -654,7 +654,7 @@ static int sync_serial_release(struct inode *inode, struct file *file) static unsigned int sync_serial_poll(struct file *file, poll_table *wait) { - int dev = MINOR(file->f_dentry->d_inode->i_rdev); + int dev = MINOR(file_inode(file)->i_rdev); unsigned int mask = 0; struct sync_port *port; DEBUGPOLL(static unsigned int prev_mask = 0); @@ -685,7 +685,7 @@ static int sync_serial_ioctl_unlocked(struct file *file, int return_val = 0; unsigned long flags; - int dev = MINOR(file->f_dentry->d_inode->i_rdev); + int dev = MINOR(file_inode(file)->i_rdev); struct sync_port *port; if (dev < 0 || dev >= NUMBER_OF_PORTS || !ports[dev].enabled) { @@ -973,7 +973,7 @@ static long sync_serial_ioctl(struct file *file, static ssize_t sync_serial_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { - int dev = MINOR(file->f_dentry->d_inode->i_rdev); + int dev = MINOR(file_inode(file)->i_rdev); DECLARE_WAITQUEUE(wait, current); struct sync_port *port; unsigned long flags; @@ -1097,7 +1097,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, static ssize_t sync_serial_read(struct file *file, char *buf, size_t count, loff_t *ppos) { - int dev = MINOR(file->f_dentry->d_inode->i_rdev); + int dev = MINOR(file_inode(file)->i_rdev); int avail; struct sync_port *port; unsigned char *start; diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index f8476d9e856b..877da1908234 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -3135,11 +3135,10 @@ static long cryptocop_ioctl_unlocked(struct inode *inode, static long cryptocop_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; long ret; mutex_lock(&cryptocop_mutex); - ret = cryptocop_ioctl_unlocked(inode, filp, cmd, arg); + ret = cryptocop_ioctl_unlocked(file_inode(filp), filp, cmd, arg); mutex_unlock(&cryptocop_mutex); return ret; diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index a6a180bc566f..219f704e3221 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -609,7 +609,7 @@ static int sync_serial_release(struct inode *inode, struct file *file) static unsigned int sync_serial_poll(struct file *file, poll_table *wait) { - int dev = iminor(file->f_path.dentry->d_inode); + int dev = iminor(file_inode(file)); unsigned int mask = 0; sync_port *port; DEBUGPOLL( static unsigned int prev_mask = 0; ); @@ -657,7 +657,7 @@ static int sync_serial_ioctl(struct file *file, { int return_val = 0; int dma_w_size = regk_dma_set_w_size1; - int dev = iminor(file->f_path.dentry->d_inode); + int dev = iminor(file_inode(file)); sync_port *port; reg_sser_rw_tr_cfg tr_cfg; reg_sser_rw_rec_cfg rec_cfg; @@ -979,7 +979,7 @@ static long sync_serial_ioctl(struct file *file, static ssize_t sync_serial_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { - int dev = iminor(file->f_path.dentry->d_inode); + int dev = iminor(file_inode(file)); DECLARE_WAITQUEUE(wait, current); struct sync_port *port; int trunc_count; @@ -1102,7 +1102,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf, static ssize_t sync_serial_read(struct file * file, char * buf, size_t count, loff_t *ppos) { - int dev = iminor(file->f_path.dentry->d_inode); + int dev = iminor(file_inode(file)); int avail; sync_port *port; unsigned char* start; diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 79802e540e53..aa527d7e91f2 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -301,7 +301,7 @@ salinfo_event_open(struct inode *inode, struct file *file) static ssize_t salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; char cmd[32]; @@ -463,7 +463,7 @@ retry: static ssize_t salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; u8 *buf; @@ -524,7 +524,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu) static ssize_t salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; char cmd[32]; diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c index b8c18dcdd2c4..88f7b50d541c 100644 --- a/arch/mips/kernel/rtlx.c +++ b/arch/mips/kernel/rtlx.c @@ -399,11 +399,9 @@ static int file_release(struct inode *inode, struct file *filp) static unsigned int file_poll(struct file *file, poll_table * wait) { - int minor; + int minor = iminor(file_inode(file)); unsigned int mask = 0; - minor = iminor(file->f_path.dentry->d_inode); - poll_wait(file, &channel_wqs[minor].rt_queue, wait); poll_wait(file, &channel_wqs[minor].lx_queue, wait); @@ -424,7 +422,7 @@ static unsigned int file_poll(struct file *file, poll_table * wait) static ssize_t file_read(struct file *file, char __user * buffer, size_t count, loff_t * ppos) { - int minor = iminor(file->f_path.dentry->d_inode); + int minor = iminor(file_inode(file)); /* data available? */ if (!rtlx_read_poll(minor, (file->f_flags & O_NONBLOCK) ? 0 : 1)) { @@ -437,11 +435,8 @@ static ssize_t file_read(struct file *file, char __user * buffer, size_t count, static ssize_t file_write(struct file *file, const char __user * buffer, size_t count, loff_t * ppos) { - int minor; - struct rtlx_channel *rt; - - minor = iminor(file->f_path.dentry->d_inode); - rt = &rtlx->channel[minor]; + int minor = iminor(file_inode(file)); + struct rtlx_channel *rt = &rtlx->channel[minor]; /* any space left... */ if (!rtlx_write_poll(minor)) { diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index eec690af6581..d75a5289d9b3 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -1149,7 +1149,7 @@ static ssize_t vpe_write(struct file *file, const char __user * buffer, size_t ret = count; struct vpe *v; - if (iminor(file->f_path.dentry->d_inode) != minor) + if (iminor(file_inode(file)) != minor) return -ENODEV; v = get_vpe(tclimit); diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c index 8e388da1926f..c592bc8b8c99 100644 --- a/arch/mips/lasat/picvue_proc.c +++ b/arch/mips/lasat/picvue_proc.c @@ -64,7 +64,7 @@ static int pvc_line_proc_open(struct inode *inode, struct file *file) static ssize_t pvc_line_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - int lineno = *(int *)PDE(file->f_path.dentry->d_inode)->data; + int lineno = *(int *)PDE(file_inode(file))->data; char kbuf[PVC_LINELEN]; size_t len; diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index c8ae3714e79b..f19d0bdc3241 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -32,7 +32,7 @@ static loff_t page_map_seek( struct file *file, loff_t off, int whence) { loff_t new; - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); switch(whence) { case 0: @@ -55,13 +55,13 @@ static loff_t page_map_seek( struct file *file, loff_t off, int whence) static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size); } static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); if ((vma->vm_end - vma->vm_start) > dp->size) return -EINVAL; diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 8329190312c1..c642f0132988 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -191,7 +191,7 @@ static void free_flash_list(struct flash_block_list *f) static int rtas_flash_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_update_flash_t *uf; uf = (struct rtas_update_flash_t *) dp->data; @@ -253,7 +253,7 @@ static void get_flash_status_msg(int status, char *buf) static ssize_t rtas_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_update_flash_t *uf; char msg[RTAS_MSG_MAXLEN]; @@ -282,7 +282,7 @@ void rtas_block_ctor(void *ptr) static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, size_t count, loff_t *off) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_update_flash_t *uf; char *p; int next_free; @@ -374,7 +374,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf) static ssize_t manage_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_manage_flash_t *args_buf; char msg[RTAS_MSG_MAXLEN]; int msglen; @@ -391,7 +391,7 @@ static ssize_t manage_flash_read(struct file *file, char __user *buf, static ssize_t manage_flash_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_manage_flash_t *args_buf; const char reject_str[] = "0"; const char commit_str[] = "1"; @@ -462,7 +462,7 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, static ssize_t validate_flash_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_validate_flash_t *args_buf; char msg[RTAS_MSG_MAXLEN]; int msglen; @@ -477,7 +477,7 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf, static ssize_t validate_flash_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_validate_flash_t *args_buf; int rc; @@ -526,7 +526,7 @@ done: static int validate_flash_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *dp = PDE(file_inode(file)); struct rtas_validate_flash_t *args_buf; args_buf = (struct rtas_validate_flash_t *) dp->data; diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 657e3f233a64..c9500ea7be2f 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -111,7 +111,7 @@ static int match_context(const void *v, struct file *file, unsigned fd) struct spu_context *ctx; if (file->f_op != &spufs_context_fops) return 0; - ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; + ctx = SPUFS_I(file_inode(file))->i_ctx; if (ctx->flags & SPU_CREATE_NOSCHED) return 0; return fd + 1; @@ -137,7 +137,7 @@ static struct spu_context *coredump_next_context(int *fd) return NULL; *fd = n - 1; file = fcheck(*fd); - return SPUFS_I(file->f_dentry->d_inode)->i_ctx; + return SPUFS_I(file_inode(file))->i_ctx; } int spufs_coredump_extra_notes_size(void) diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 0cfece4cf6ef..68c57d38745a 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1852,7 +1852,7 @@ out: static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int err = filemap_write_and_wait_range(inode->i_mapping, start, end); if (!err) { mutex_lock(&inode->i_mutex); @@ -2501,7 +2501,7 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n) static ssize_t spufs_switch_log_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct spu_context *ctx = SPUFS_I(inode)->i_ctx; int error = 0, cnt = 0; @@ -2571,7 +2571,7 @@ static ssize_t spufs_switch_log_read(struct file *file, char __user *buf, static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct spu_context *ctx = SPUFS_I(inode)->i_ctx; unsigned int mask = 0; int rc; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index dba1ce235da5..99db6161e5c9 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -368,7 +368,7 @@ spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, return ERR_PTR(-EINVAL); neighbor = get_spu_context( - SPUFS_I(filp->f_dentry->d_inode)->i_ctx); + SPUFS_I(file_inode(filp))->i_ctx); if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index baee994fe810..b045fdda4845 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp, if (filp->f_op != &spufs_context_fops) goto out; - i = SPUFS_I(filp->f_path.dentry->d_inode); + i = SPUFS_I(file_inode(filp)); ret = spufs_run_spu(i->i_ctx, &npc, &status); if (put_user(npc, unpc)) diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index c9311cfdfcac..cf4e7736e4f1 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -86,7 +86,7 @@ static int hcall_inst_seq_open(struct inode *inode, struct file *file) rc = seq_open(file, &hcall_inst_seq_ops); seq = file->private_data; - seq->private = file->f_path.dentry->d_inode->i_private; + seq->private = file_inode(file)->i_private; return rc; } diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 554457294a2b..47f3cda2a68b 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -46,16 +46,12 @@ static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ static ssize_t scanlog_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; - struct proc_dir_entry *dp; - unsigned int *data; + struct proc_dir_entry *dp = PDE(file_inode(file)); + unsigned int *data = (unsigned int *)dp->data; int status; unsigned long len, off; unsigned int wait_time; - dp = PDE(inode); - data = (unsigned int *)dp->data; - if (count > RTAS_DATA_BUF_SIZE) count = RTAS_DATA_BUF_SIZE; diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 13e76dabbe8b..9fd4a40c6752 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -54,7 +54,7 @@ static ssize_t dbfs_read(struct file *file, char __user *buf, if (*ppos != 0) return 0; - df = file->f_path.dentry->d_inode->i_private; + df = file_inode(file)->i_private; mutex_lock(&df->lock); if (!df->data) { data = hypfs_dbfs_data_alloc(df); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 06ea69bd387a..280ded8b79ba 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -119,7 +119,7 @@ static void hypfs_evict_inode(struct inode *inode) static int hypfs_open(struct inode *inode, struct file *filp) { - char *data = filp->f_path.dentry->d_inode->i_private; + char *data = file_inode(filp)->i_private; struct hypfs_sb_info *fs_info; if (filp->f_mode & FMODE_WRITE) { diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 4e8215e0d4b6..19dcf136b851 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -611,7 +611,7 @@ debug_open(struct inode *inode, struct file *file) debug_info_t *debug_info, *debug_info_snapshot; mutex_lock(&debug_mutex); - debug_info = file->f_path.dentry->d_inode->i_private; + debug_info = file_inode(file)->i_private; /* find debug view */ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index a303c95346cb..a5d07bc2a547 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -99,7 +99,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, static int pci_perf_seq_open(struct inode *inode, struct file *filp) { return single_open(filp, pci_perf_show, - filp->f_path.dentry->d_inode->i_private); + file_inode(filp)->i_private); } static const struct file_operations debugfs_pci_perf_fops = { @@ -121,7 +121,7 @@ static int pci_debug_show(struct seq_file *m, void *v) static int pci_debug_seq_open(struct inode *inode, struct file *filp) { return single_open(filp, pci_debug_show, - filp->f_path.dentry->d_inode->i_private); + file_inode(filp)->i_private); } static const struct file_operations debugfs_pci_debug_fops = { diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c index 620fa7ff9eec..aea14855e656 100644 --- a/arch/sh/mm/alignment.c +++ b/arch/sh/mm/alignment.c @@ -140,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file) static ssize_t alignment_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - int *data = PDE(file->f_path.dentry->d_inode)->data; + int *data = PDE(file_inode(file))->data; char mode; if (count > 0) { diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index a703af19c281..03abf9b70011 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -271,7 +271,7 @@ static int load_aout_binary(struct linux_binprm *bprm) if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || - i_size_read(bprm->file->f_path.dentry->d_inode) < + i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } @@ -425,12 +425,10 @@ beyond_if: static int load_aout_library(struct file *file) { - struct inode *inode; unsigned long bss, start_addr, len, error; int retval; struct exec ex; - inode = file->f_path.dentry->d_inode; retval = -ENOEXEC; error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); @@ -440,7 +438,7 @@ static int load_aout_library(struct file *file) /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || - i_size_read(inode) < + i_size_read(file_inode(file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { goto out; } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 60c78917190c..1e4dbcfe6d31 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -85,7 +85,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, { char __user *tmp = buf; struct cpuid_regs cmd; - int cpu = iminor(file->f_path.dentry->d_inode); + int cpu = iminor(file_inode(file)); u64 pos = *ppos; ssize_t bytes = 0; int err = 0; @@ -116,7 +116,7 @@ static int cpuid_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; - cpu = iminor(file->f_path.dentry->d_inode); + cpu = iminor(file_inode(file)); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 9a13e889837e..8f12dc78a848 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -6547,7 +6547,7 @@ static ssize_t dac960_user_command_proc_write(struct file *file, const char __user *Buffer, size_t Count, loff_t *pos) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file->f_path.dentry->d_inode)->data; + DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file_inode(file))->data; unsigned char CommandBuffer[80]; int Length; if (Count > sizeof(CommandBuffer)-1) return -EINVAL; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 043ddcca4abf..ade146bf65e5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -625,7 +625,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return -EBUSY; file = fget(arg); if (file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); if (S_ISSOCK(inode->i_mode)) { nbd->file = file; nbd->sock = SOCKET_I(inode); diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c index 052797b32bd3..01a5ca7425d7 100644 --- a/drivers/char/dsp56k.c +++ b/drivers/char/dsp56k.c @@ -181,7 +181,7 @@ static int dsp56k_upload(u_char __user *bin, int len) static ssize_t dsp56k_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int dev = iminor(inode) & 0x0f; switch(dev) @@ -244,7 +244,7 @@ static ssize_t dsp56k_read(struct file *file, char __user *buf, size_t count, static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int dev = iminor(inode) & 0x0f; switch(dev) @@ -306,7 +306,7 @@ static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t co static long dsp56k_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int dev = iminor(file->f_path.dentry->d_inode) & 0x0f; + int dev = iminor(file_inode(file)) & 0x0f; void __user *argp = (void __user *)arg; switch(dev) @@ -408,7 +408,7 @@ static long dsp56k_ioctl(struct file *file, unsigned int cmd, #if 0 static unsigned int dsp56k_poll(struct file *file, poll_table *wait) { - int dev = iminor(file->f_path.dentry->d_inode) & 0x0f; + int dev = iminor(file_inode(file)) & 0x0f; switch(dev) { diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c index 85156dd0caee..65a8d96c0e93 100644 --- a/drivers/char/dtlk.c +++ b/drivers/char/dtlk.c @@ -125,7 +125,7 @@ static char dtlk_write_tts(char); static ssize_t dtlk_read(struct file *file, char __user *buf, size_t count, loff_t * ppos) { - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); char ch; int i = 0, retries; @@ -177,7 +177,7 @@ static ssize_t dtlk_write(struct file *file, const char __user *buf, } #endif - if (iminor(file->f_path.dentry->d_inode) != DTLK_MINOR) + if (iminor(file_inode(file)) != DTLK_MINOR) return -EINVAL; while (1) { diff --git a/drivers/char/lp.c b/drivers/char/lp.c index a741e418b456..dafd9ac6428f 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -294,7 +294,7 @@ static int lp_wait_ready(int minor, int nonblock) static ssize_t lp_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); struct parport *port = lp_table[minor].dev->port; char *kbuf = lp_table[minor].lp_buffer; ssize_t retv = 0; @@ -413,7 +413,7 @@ static ssize_t lp_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { DEFINE_WAIT(wait); - unsigned int minor=iminor(file->f_path.dentry->d_inode); + unsigned int minor=iminor(file_inode(file)); struct parport *port = lp_table[minor].dev->port; ssize_t retval = 0; char *kbuf = lp_table[minor].lp_buffer; @@ -679,7 +679,7 @@ static long lp_ioctl(struct file *file, unsigned int cmd, struct timeval par_timeout; int ret; - minor = iminor(file->f_path.dentry->d_inode); + minor = iminor(file_inode(file)); mutex_lock(&lp_mutex); switch (cmd) { case LPSETTIMEOUT: @@ -707,7 +707,7 @@ static long lp_compat_ioctl(struct file *file, unsigned int cmd, struct timeval par_timeout; int ret; - minor = iminor(file->f_path.dentry->d_inode); + minor = iminor(file_inode(file)); mutex_lock(&lp_mutex); switch (cmd) { case LPSETTIMEOUT: diff --git a/drivers/char/mem.c b/drivers/char/mem.c index c6fa3bc2baa8..e23b4a247b72 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -708,7 +708,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig) { loff_t ret; - mutex_lock(&file->f_path.dentry->d_inode->i_mutex); + mutex_lock(&file_inode(file)->i_mutex); switch (orig) { case SEEK_CUR: offset += file->f_pos; @@ -725,7 +725,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig) default: ret = -EINVAL; } - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&file_inode(file)->i_mutex); return ret; } diff --git a/drivers/char/nsc_gpio.c b/drivers/char/nsc_gpio.c index 808d44e9a32a..b07b119ae57f 100644 --- a/drivers/char/nsc_gpio.c +++ b/drivers/char/nsc_gpio.c @@ -41,7 +41,7 @@ void nsc_gpio_dump(struct nsc_gpio_ops *amp, unsigned index) ssize_t nsc_gpio_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { - unsigned m = iminor(file->f_path.dentry->d_inode); + unsigned m = iminor(file_inode(file)); struct nsc_gpio_ops *amp = file->private_data; struct device *dev = amp->dev; size_t i; @@ -104,7 +104,7 @@ ssize_t nsc_gpio_write(struct file *file, const char __user *data, ssize_t nsc_gpio_read(struct file *file, char __user * buf, size_t len, loff_t * ppos) { - unsigned m = iminor(file->f_path.dentry->d_inode); + unsigned m = iminor(file_inode(file)); int value; struct nsc_gpio_ops *amp = file->private_data; diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index a7584860e9a7..c115217c79ae 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -1400,7 +1400,7 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct cm4000_dev *dev = filp->private_data; unsigned int iobase = dev->p_dev->resource[0]->start; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct pcmcia_device *link; int size; int rc; diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 1cd49241e60e..ae0b42b66e55 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -107,7 +107,7 @@ static inline void pp_enable_irq (struct pp_struct *pp) static ssize_t pp_read (struct file * file, char __user * buf, size_t count, loff_t * ppos) { - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); struct pp_struct *pp = file->private_data; char * kbuffer; ssize_t bytes_read = 0; @@ -189,7 +189,7 @@ static ssize_t pp_read (struct file * file, char __user * buf, size_t count, static ssize_t pp_write (struct file * file, const char __user * buf, size_t count, loff_t * ppos) { - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); struct pp_struct *pp = file->private_data; char * kbuffer; ssize_t bytes_written = 0; @@ -324,7 +324,7 @@ static enum ieee1284_phase init_phase (int mode) static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); struct pp_struct *pp = file->private_data; struct parport * port; void __user *argp = (void __user *)arg; diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 588063ac9517..8cafa9ccd43f 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -312,7 +312,7 @@ static int ps3flash_flush(struct file *file, fl_owner_t id) static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int err; mutex_lock(&inode->i_mutex); err = ps3flash_writeback(ps3flash_dev); diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 54a3a6d09819..f3223aac4df1 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -80,7 +80,7 @@ static int raw_open(struct inode *inode, struct file *filp) filp->f_flags |= O_DIRECT; filp->f_mapping = bdev->bd_inode->i_mapping; if (++raw_devices[minor].inuse == 1) - filp->f_path.dentry->d_inode->i_mapping = + file_inode(filp)->i_mapping = bdev->bd_inode->i_mapping; filp->private_data = bdev; mutex_unlock(&raw_mutex); diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index d780295a1473..8450e178b819 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -938,7 +938,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf, } if (ret > 0) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); inode->i_atime = current_fs_time(inode->i_sb); } diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c index 34c63f85104d..47b9fdfcf083 100644 --- a/drivers/char/tb0219.c +++ b/drivers/char/tb0219.c @@ -164,7 +164,7 @@ static ssize_t tanbac_tb0219_read(struct file *file, char __user *buf, size_t le unsigned int minor; char value; - minor = iminor(file->f_path.dentry->d_inode); + minor = iminor(file_inode(file)); switch (minor) { case 0: value = get_led(); @@ -200,7 +200,7 @@ static ssize_t tanbac_tb0219_write(struct file *file, const char __user *data, int retval = 0; char c; - minor = iminor(file->f_path.dentry->d_inode); + minor = iminor(file_inode(file)); switch (minor) { case 0: type = TYPE_LED; diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c index 04a371aceb34..054e26e769ec 100644 --- a/drivers/gpu/drm/gma500/gtt.c +++ b/drivers/gpu/drm/gma500/gtt.c @@ -202,7 +202,7 @@ static int psb_gtt_attach_pages(struct gtt_range *gt) WARN_ON(gt->pages); /* This is the shared memory object that backs the GEM resource */ - inode = gt->gem.filp->f_path.dentry->d_inode; + inode = file_inode(gt->gem.filp); mapping = inode->i_mapping; gt->pages = kmalloc(pages * sizeof(struct page *), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8febea6daa08..d7d772b30f1a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1635,7 +1635,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) * To do this we must instruct the shmfs to drop all of its * backing pages, *now*. */ - inode = obj->base.filp->f_path.dentry->d_inode; + inode = file_inode(obj->base.filp); shmem_truncate_range(inode, 0, (loff_t)-1); obj->madv = __I915_MADV_PURGED; @@ -1800,7 +1800,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) * * Fail silently without starting the shrinker */ - mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; + mapping = file_inode(obj->base.filp)->i_mapping; gfp = mapping_gfp_mask(mapping); gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; gfp &= ~(__GFP_IO | __GFP_WAIT); @@ -3724,7 +3724,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, mask |= __GFP_DMA32; } - mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; + mapping = file_inode(obj->base.filp)->i_mapping; mapping_set_gfp_mask(mapping, mask); i915_gem_object_init(obj, &i915_gem_object_ops); @@ -4228,7 +4228,7 @@ void i915_gem_free_all_phys_object(struct drm_device *dev) void i915_gem_detach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj) { - struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; + struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; char *vaddr; int i; int page_count; @@ -4264,7 +4264,7 @@ i915_gem_attach_phys_object(struct drm_device *dev, int id, int align) { - struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; + struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; drm_i915_private_t *dev_priv = dev->dev_private; int ret = 0; int page_count; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 7d759a430294..5e93a52d4f2c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -296,7 +296,7 @@ int ttm_tt_swapin(struct ttm_tt *ttm) swap_storage = ttm->swap_storage; BUG_ON(swap_storage == NULL); - swap_space = swap_storage->f_path.dentry->d_inode->i_mapping; + swap_space = file_inode(swap_storage)->i_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = shmem_read_mapping_page(swap_space, i); @@ -345,7 +345,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage) } else swap_storage = persistent_swap_storage; - swap_space = swap_storage->f_path.dentry->d_inode->i_mapping; + swap_space = file_inode(swap_storage)->i_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index afd212c99216..3816270ba49b 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -137,7 +137,7 @@ static int udl_gem_get_pages(struct udl_gem_object *obj, gfp_t gfpmask) if (obj->pages == NULL) return -ENOMEM; - inode = obj->base.filp->f_path.dentry->d_inode; + inode = file_inode(obj->base.filp); mapping = inode->i_mapping; gfpmask |= mapping_gfp_mask(mapping); diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c index b685b04dbf9d..d7437ef5c695 100644 --- a/drivers/hid/hid-roccat.c +++ b/drivers/hid/hid-roccat.c @@ -378,7 +378,7 @@ EXPORT_SYMBOL_GPL(roccat_disconnect); static long roccat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct roccat_device *device; unsigned int minor = iminor(inode); long retval = 0; diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 413a73187d33..3ad07a53a229 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -108,7 +108,7 @@ out: * This function is to be called with the minors_lock mutex held */ static ssize_t hidraw_send_report(struct file *file, const char __user *buffer, size_t count, unsigned char report_type) { - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); struct hid_device *dev; __u8 *buf; int ret = 0; @@ -176,7 +176,7 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t * mutex held. */ static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t count, unsigned char report_type) { - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); struct hid_device *dev; __u8 *buf; int ret = 0, len; @@ -340,7 +340,7 @@ unlock: static long hidraw_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); unsigned int minor = iminor(inode); long ret = 0; struct hidraw *dev; diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 5ec2261574ec..c3ccdea3d180 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -148,7 +148,7 @@ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count, return -ENOMEM; pr_debug("i2c-dev: i2c-%d reading %zu bytes.\n", - iminor(file->f_path.dentry->d_inode), count); + iminor(file_inode(file)), count); ret = i2c_master_recv(client, tmp, count); if (ret >= 0) @@ -172,7 +172,7 @@ static ssize_t i2cdev_write(struct file *file, const char __user *buf, return PTR_ERR(tmp); pr_debug("i2c-dev: i2c-%d writing %zu bytes.\n", - iminor(file->f_path.dentry->d_inode), count); + iminor(file_inode(file)), count); ret = i2c_master_send(client, tmp, count); kfree(tmp); diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index a3133d7b2a0c..2abcc4790f12 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -333,7 +333,7 @@ static int ide_settings_proc_open(struct inode *inode, struct file *file) static ssize_t ide_settings_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - ide_drive_t *drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data; + ide_drive_t *drive = (ide_drive_t *) PDE(file_inode(file))->data; char name[MAX_LEN + 1]; int for_real = 0, mul_factor, div_factor; unsigned long n; @@ -558,7 +558,7 @@ static int ide_replace_subdriver(ide_drive_t *drive, const char *driver) static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - ide_drive_t *drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data; + ide_drive_t *drive = (ide_drive_t *) PDE(file_inode(file))->data; char name[32]; if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 0cb0007724a2..792e7e9b376f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -730,7 +730,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_tree_mutex_unlock; } - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); xrcd = find_xrcd(file->device, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 3eb7e454849b..aed8afee56da 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1864,9 +1864,9 @@ static int ipath_assign_port(struct file *fp, goto done_chk_sdma; } - i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; + i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE; ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); + (long)file_inode(fp)->i_rdev, i_minor); if (i_minor) ret = find_free_port(i_minor - 1, fp, uinfo); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index a4de9d58e9b4..a479375a8fd8 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -113,7 +113,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, struct infinipath_counters counters; struct ipath_devdata *dd; - dd = file->f_path.dentry->d_inode->i_private; + dd = file_inode(file)->i_private; dd->ipath_f_read_counters(dd, &counters); return simple_read_from_buffer(buf, count, ppos, &counters, @@ -154,7 +154,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_path.dentry->d_inode->i_private; + dd = file_inode(file)->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -207,7 +207,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_path.dentry->d_inode->i_private; + dd = file_inode(file)->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 959a5c4ff812..4f7aa301b3b1 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1524,7 +1524,7 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) } } - i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE; + i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); else diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 65a2a23f6f8a..644bd6f6467c 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -45,7 +45,7 @@ static struct super_block *qib_super; -#define private2dd(file) ((file)->f_dentry->d_inode->i_private) +#define private2dd(file) (file_inode(file)->i_private) static int qibfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, const struct file_operations *fops, @@ -171,7 +171,7 @@ static const struct file_operations cntr_ops[] = { }; /* - * Could use file->f_dentry->d_inode->i_ino to figure out which file, + * Could use file_inode(file)->i_ino to figure out which file, * instead of separate routine for each, but for now, this works... */ diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index fc178893789a..7db150ca163e 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -964,7 +964,6 @@ static ssize_t smmu_debugfs_stats_write(struct file *file, { struct smmu_debugfs_info *info; struct smmu_device *smmu; - struct dentry *dent; int i; enum { _OFF = 0, @@ -992,8 +991,7 @@ static ssize_t smmu_debugfs_stats_write(struct file *file, if (i == ARRAY_SIZE(command)) return -EINVAL; - dent = file->f_dentry; - info = dent->d_inode->i_private; + info = file_inode(file)->i_private; smmu = info->smmu; offs = SMMU_CACHE_CONFIG(info->cache); diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c index af4fd3d036c1..3a4165c61196 100644 --- a/drivers/isdn/hardware/eicon/divasproc.c +++ b/drivers/isdn/hardware/eicon/divasproc.c @@ -145,7 +145,7 @@ void remove_divas_proc(void) static ssize_t grp_opt_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data; + diva_os_xdi_adapter_t *a = PDE(file_inode(file))->data; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; if ((count == 1) || (count == 2)) { @@ -172,7 +172,7 @@ static ssize_t grp_opt_proc_write(struct file *file, const char __user *buffer, static ssize_t d_l1_down_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data; + diva_os_xdi_adapter_t *a = PDE(file_inode(file))->data; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; if ((count == 1) || (count == 2)) { @@ -251,7 +251,7 @@ static const struct file_operations grp_opt_proc_fops = { static ssize_t info_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data; + diva_os_xdi_adapter_t *a = PDE(file_inode(file))->data; PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1]; char c[4]; diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c index 88e4f0ee073c..9a3ce93665c5 100644 --- a/drivers/isdn/hysdn/hysdn_proclog.c +++ b/drivers/isdn/hysdn/hysdn_proclog.c @@ -173,7 +173,7 @@ hysdn_log_read(struct file *file, char __user *buf, size_t count, loff_t *off) { struct log_data *inf; int len; - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); struct procdata *pd = NULL; hysdn_card *card; @@ -319,7 +319,7 @@ static unsigned int hysdn_log_poll(struct file *file, poll_table *wait) { unsigned int mask = 0; - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); hysdn_card *card; struct procdata *pd = NULL; diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index e2a945ee9f05..32049ed2f24c 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c @@ -1058,7 +1058,7 @@ isdn_info_update(void) static ssize_t isdn_read(struct file *file, char __user *buf, size_t count, loff_t *off) { - uint minor = iminor(file->f_path.dentry->d_inode); + uint minor = iminor(file_inode(file)); int len = 0; int drvidx; int chidx; @@ -1165,7 +1165,7 @@ out: static ssize_t isdn_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { - uint minor = iminor(file->f_path.dentry->d_inode); + uint minor = iminor(file_inode(file)); int drvidx; int chidx; int retval; @@ -1228,7 +1228,7 @@ static unsigned int isdn_poll(struct file *file, poll_table *wait) { unsigned int mask = 0; - unsigned int minor = iminor(file->f_path.dentry->d_inode); + unsigned int minor = iminor(file_inode(file)); int drvidx = isdn_minor2drv(minor - ISDN_MINOR_CTRL); mutex_lock(&isdn_mutex); @@ -1269,7 +1269,7 @@ out: static int isdn_ioctl(struct file *file, uint cmd, ulong arg) { - uint minor = iminor(file->f_path.dentry->d_inode); + uint minor = iminor(file_inode(file)); isdn_ctrl c; int drvidx; int ret; diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 61d78fa03b1a..38ceac5053a0 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -668,7 +668,7 @@ isdn_ppp_poll(struct file *file, poll_table *wait) if (is->debug & 0x2) printk(KERN_DEBUG "isdn_ppp_poll: minor: %d\n", - iminor(file->f_path.dentry->d_inode)); + iminor(file_inode(file))); /* just registers wait_queue hook. This doesn't really wait. */ poll_wait(file, &is->wq, wait); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7155945f8eb8..4fd9d6aeff6a 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -337,7 +337,7 @@ static int read_page(struct file *file, unsigned long index, struct page *page) { int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct buffer_head *bh; sector_t block; @@ -755,7 +755,7 @@ static void bitmap_file_unmap(struct bitmap_storage *store) free_buffers(sb_page); if (file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); invalidate_mapping_pages(inode->i_mapping, 0, -1); fput(file); } diff --git a/drivers/media/pci/zoran/zoran_procfs.c b/drivers/media/pci/zoran/zoran_procfs.c index f1423b777db1..e084b0a21b1b 100644 --- a/drivers/media/pci/zoran/zoran_procfs.c +++ b/drivers/media/pci/zoran/zoran_procfs.c @@ -137,7 +137,7 @@ static int zoran_open(struct inode *inode, struct file *file) static ssize_t zoran_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct zoran *zr = PDE(file->f_path.dentry->d_inode)->data; + struct zoran *zr = PDE(file_inode(file))->data; char *string, *sp; char *line, *ldelim, *varname, *svar, *tdelim; diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index ca12d3289bfe..35002367485c 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -531,7 +531,7 @@ EXPORT_SYMBOL(lirc_dev_fop_close); unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait) { - struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)]; + struct irctl *ir = irctls[iminor(file_inode(file))]; unsigned int ret; if (!ir) { @@ -565,7 +565,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { __u32 mode; int result = 0; - struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)]; + struct irctl *ir = irctls[iminor(file_inode(file))]; if (!ir) { printk(KERN_ERR "lirc_dev: %s: no irctl found!\n", __func__); @@ -650,7 +650,7 @@ ssize_t lirc_dev_fop_read(struct file *file, size_t length, loff_t *ppos) { - struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)]; + struct irctl *ir = irctls[iminor(file_inode(file))]; unsigned char *buf; int ret = 0, written = 0; DECLARE_WAITQUEUE(wait, current); @@ -754,10 +754,10 @@ void *lirc_get_pdata(struct file *file) { void *data = NULL; - if (file && file->f_dentry && file->f_dentry->d_inode && - file->f_dentry->d_inode->i_rdev) { + if (file && file->f_dentry && file_inode(file) && + file_inode(file)->i_rdev) { struct irctl *ir; - ir = irctls[iminor(file->f_dentry->d_inode)]; + ir = irctls[iminor(file_inode(file))]; data = ir->d.data; } @@ -769,7 +769,7 @@ EXPORT_SYMBOL(lirc_get_pdata); ssize_t lirc_dev_fop_write(struct file *file, const char __user *buffer, size_t length, loff_t *ppos) { - struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)]; + struct irctl *ir = irctls[iminor(file_inode(file))]; if (!ir) { printk(KERN_ERR "%s: called with invalid irctl\n", __func__); diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index 98dcad9c8a3b..870de1d5667a 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -222,7 +222,7 @@ static struct class video_class = { struct video_device *video_devdata(struct file *file) { - return video_device[iminor(file->f_path.dentry->d_inode)]; + return video_device[iminor(file_inode(file))]; } EXPORT_SYMBOL(video_devdata); diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index dfcc65b33e99..4f02848bb2bc 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -194,7 +194,7 @@ static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, { struct ubi_volume_desc *desc = file->private_data; struct ubi_device *ubi = desc->vol->ubi; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int err; mutex_lock(&inode->i_mutex); err = ubi_sync(ubi->ubi_num); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f0718e1a8369..8fb1ccfe74dc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2336,7 +2336,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { loff_t pos = *ppos; - loff_t avail = file->f_path.dentry->d_inode->i_size; + loff_t avail = file_inode(file)->i_size; unsigned int mem = (uintptr_t)file->private_data & 3; struct adapter *adap = file->private_data - mem; diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 6aed238e573e..1505509728aa 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -939,14 +939,14 @@ static int cosa_open(struct inode *inode, struct file *file) int ret = 0; mutex_lock(&cosa_chardev_mutex); - if ((n=iminor(file->f_path.dentry->d_inode)>>CARD_MINOR_BITS) + if ((n=iminor(file_inode(file))>>CARD_MINOR_BITS) >= nr_cards) { ret = -ENODEV; goto out; } cosa = cosa_cards+n; - if ((n=iminor(file->f_path.dentry->d_inode) + if ((n=iminor(file_inode(file)) & ((1<= cosa->nchannels) { ret = -ENODEV; goto out; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 598ca1cafb95..5311ba1dcd2c 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -2769,7 +2769,7 @@ static ssize_t int_proc_write(struct file *file, const char __user *buffer, nr = nr * 10 + c; p++; } while (--len); - *(int *)PDE(file->f_path.dentry->d_inode)->data = nr; + *(int *)PDE(file_inode(file))->data = nr; return count; } diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index f2f501e5b6a0..d4d800c54d86 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -179,7 +179,7 @@ static int led_proc_open(struct inode *inode, struct file *file) static ssize_t led_proc_write(struct file *file, const char *buf, size_t count, loff_t *pos) { - void *data = PDE(file->f_path.dentry->d_inode)->data; + void *data = PDE(file_inode(file))->data; char *cur, lbuf[32]; int d; diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 9b8505ccc56d..0b009470e6db 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -21,7 +21,7 @@ static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence) { loff_t new = -1; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); mutex_lock(&inode->i_mutex); switch (whence) { @@ -46,7 +46,7 @@ proc_bus_pci_lseek(struct file *file, loff_t off, int whence) static ssize_t proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - const struct inode *ino = file->f_path.dentry->d_inode; + const struct inode *ino = file_inode(file); const struct proc_dir_entry *dp = PDE(ino); struct pci_dev *dev = dp->data; unsigned int pos = *ppos; @@ -132,7 +132,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos) { - struct inode *ino = file->f_path.dentry->d_inode; + struct inode *ino = file_inode(file); const struct proc_dir_entry *dp = PDE(ino); struct pci_dev *dev = dp->data; int pos = *ppos; @@ -212,7 +212,7 @@ struct pci_filp_private { static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - const struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + const struct proc_dir_entry *dp = PDE(file_inode(file)); struct pci_dev *dev = dp->data; #ifdef HAVE_PCI_MMAP struct pci_filp_private *fpriv = file->private_data; @@ -253,7 +253,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, #ifdef HAVE_PCI_MMAP static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); const struct proc_dir_entry *dp = PDE(inode); struct pci_dev *dev = dp->data; struct pci_filp_private *fpriv = file->private_data; diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index b8ad71f7863f..8853d013a716 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -3566,7 +3566,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf, } if (ret > 0) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); inode->i_atime = current_fs_time(inode->i_sb); } diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index f946ca7cb762..05dfd569d912 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -852,7 +852,7 @@ static ssize_t dispatch_proc_write(struct file *file, const char __user *userbuf, size_t count, loff_t *pos) { - struct ibm_struct *ibm = PDE(file->f_path.dentry->d_inode)->data; + struct ibm_struct *ibm = PDE(file_inode(file))->data; char *kernbuf; int ret; diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index c2727895794c..6fba80ad3bca 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -583,7 +583,7 @@ static int set_lcd_status(struct backlight_device *bd) static ssize_t lcd_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data; + struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data; char cmd[42]; size_t len; int value; @@ -650,7 +650,7 @@ static int video_proc_open(struct inode *inode, struct file *file) static ssize_t video_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data; + struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data; char *cmd, *buffer; int ret; int value; @@ -750,7 +750,7 @@ static int fan_proc_open(struct inode *inode, struct file *file) static ssize_t fan_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data; + struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data; char cmd[42]; size_t len; int value; @@ -822,7 +822,7 @@ static int keys_proc_open(struct inode *inode, struct file *file) static ssize_t keys_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data; + struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data; char cmd[42]; size_t len; int value; diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c index 315b3112aca8..65f735ac6b3b 100644 --- a/drivers/pnp/isapnp/proc.c +++ b/drivers/pnp/isapnp/proc.c @@ -30,7 +30,7 @@ static struct proc_dir_entry *isapnp_proc_bus_dir = NULL; static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence) { loff_t new = -1; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); mutex_lock(&inode->i_mutex); switch (whence) { @@ -55,7 +55,7 @@ static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence) static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf, size_t nbytes, loff_t * ppos) { - struct inode *ino = file->f_path.dentry->d_inode; + struct inode *ino = file_inode(file); struct proc_dir_entry *dp = PDE(ino); struct pnp_dev *dev = dp->data; int pos = *ppos; diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c index bc89f392a629..63ddb0173456 100644 --- a/drivers/pnp/pnpbios/proc.c +++ b/drivers/pnp/pnpbios/proc.c @@ -244,7 +244,7 @@ static int pnpbios_proc_open(struct inode *inode, struct file *file) static ssize_t pnpbios_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - void *data = PDE(file->f_path.dentry->d_inode)->data; + void *data = PDE(file_inode(file))->data; struct pnp_bios_node *node; int boot = (long)data >> 8; u8 nodenum = (long)data; diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 911704571b9c..5acdc5f7dae8 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -433,9 +433,9 @@ fs3270_open(struct inode *inode, struct file *filp) struct idal_buffer *ib; int minor, rc = 0; - if (imajor(filp->f_path.dentry->d_inode) != IBM_FS3270_MAJOR) + if (imajor(file_inode(filp)) != IBM_FS3270_MAJOR) return -ENODEV; - minor = iminor(filp->f_path.dentry->d_inode); + minor = iminor(file_inode(filp)); /* Check for minor 0 multiplexer. */ if (minor == 0) { struct tty_struct *tty = get_current_tty(); diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c index 2d61db3fc62a..6dc60725de92 100644 --- a/drivers/s390/char/tape_char.c +++ b/drivers/s390/char/tape_char.c @@ -273,13 +273,13 @@ tapechar_open (struct inode *inode, struct file *filp) int minor, rc; DBF_EVENT(6, "TCHAR:open: %i:%i\n", - imajor(filp->f_path.dentry->d_inode), - iminor(filp->f_path.dentry->d_inode)); + imajor(file_inode(filp)), + iminor(file_inode(filp))); - if (imajor(filp->f_path.dentry->d_inode) != tapechar_major) + if (imajor(file_inode(filp)) != tapechar_major) return -ENODEV; - minor = iminor(filp->f_path.dentry->d_inode); + minor = iminor(file_inode(filp)); device = tape_find_device(minor / TAPE_MINORS_PER_DEV); if (IS_ERR(device)) { DBF_EVENT(3, "TCHAR:open: tape_find_device() failed\n"); diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index 483f72ba030d..c180e3135b3b 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -703,7 +703,7 @@ static int ur_open(struct inode *inode, struct file *file) * We treat the minor number as the devno of the ur device * to find in the driver tree. */ - devno = MINOR(file->f_dentry->d_inode->i_rdev); + devno = MINOR(file_inode(file)->i_rdev); urd = urdev_get_from_devno(devno); if (!urd) { diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index e6e0d31c02ac..749b72739c4a 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -128,7 +128,7 @@ static int qstat_show(struct seq_file *m, void *v) static int qstat_seq_open(struct inode *inode, struct file *filp) { return single_open(filp, qstat_show, - filp->f_path.dentry->d_inode->i_private); + file_inode(filp)->i_private); } static const struct file_operations debugfs_fops = { @@ -221,7 +221,7 @@ static ssize_t qperf_seq_write(struct file *file, const char __user *ubuf, static int qperf_seq_open(struct inode *inode, struct file *filp) { return single_open(filp, qperf_show, - filp->f_path.dentry->d_inode->i_private); + file_inode(filp)->i_private); } static struct file_operations debugfs_perf_fops = { diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c index e85c803b30cd..fc1339cf91ac 100644 --- a/drivers/sbus/char/display7seg.c +++ b/drivers/sbus/char/display7seg.c @@ -107,7 +107,7 @@ static long d7s_ioctl(struct file *file, unsigned int cmd, unsigned long arg) int error = 0; u8 ireg = 0; - if (D7S_MINOR != iminor(file->f_path.dentry->d_inode)) + if (D7S_MINOR != iminor(file_inode(file))) return -ENODEV; mutex_lock(&d7s_mutex); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index d1f0120cdb98..5e1e12c0cf42 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -640,7 +640,7 @@ out: /* This function handles ioctl for the character device */ static long twa_chrdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); long timeout; unsigned long *cpu_addr, data_buffer_length_adjusted = 0, flags = 0; dma_addr_t dma_handle; diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 52a2f0580d97..c845bdbeb6c0 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -757,7 +757,7 @@ static long twl_chrdev_ioctl(struct file *file, unsigned int cmd, unsigned long dma_addr_t dma_handle; int request_id = 0; TW_Ioctl_Driver_Command driver_command; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); TW_Ioctl_Buf_Apache *tw_ioctl; TW_Command_Full *full_command_packet; TW_Device_Extension *tw_dev = twl_device_extension_list[iminor(inode)]; diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index 62071d2fc1ce..56662ae03dea 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -889,7 +889,7 @@ static long tw_chrdev_ioctl(struct file *file, unsigned int cmd, unsigned long a unsigned long flags; unsigned int data_buffer_length = 0; unsigned long data_buffer_length_adjusted = 0; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); unsigned long *cpu_addr; long timeout; TW_New_Ioctl *tw_ioctl; diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index b42cbbd3d92d..c323b2030afa 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -71,7 +71,7 @@ static ssize_t csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { loff_t pos = *ppos; - loff_t avail = file->f_path.dentry->d_inode->i_size; + loff_t avail = file_inode(file)->i_size; unsigned int mem = (uintptr_t)file->private_data & 3; struct csio_hw *hw = file->private_data - mem; diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index b4f6c9a84e71..b6e2700ec1c6 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -2161,7 +2161,7 @@ static long adpt_unlocked_ioctl(struct file *file, uint cmd, ulong arg) struct inode *inode; long ret; - inode = file->f_dentry->d_inode; + inode = file_inode(file); mutex_lock(&adpt_mutex); ret = adpt_ioctl(inode, file, cmd, arg); @@ -2177,7 +2177,7 @@ static long compat_adpt_ioctl(struct file *file, struct inode *inode; long ret; - inode = file->f_dentry->d_inode; + inode = file_inode(file); mutex_lock(&adpt_mutex); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 98156a97c472..3e2b3717cb5c 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -977,7 +977,7 @@ static int check_tape(struct scsi_tape *STp, struct file *filp) struct st_modedef *STm; struct st_partstat *STps; char *name = tape_name(STp); - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int mode = TAPE_MODE(inode); STp->ready = ST_READY; diff --git a/drivers/staging/bcm/Misc.c b/drivers/staging/bcm/Misc.c index c92078e7fe86..5d5d95de081e 100644 --- a/drivers/staging/bcm/Misc.c +++ b/drivers/staging/bcm/Misc.c @@ -185,7 +185,7 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Unable to Open %s\n", path); return -ENOENT; } - BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Opened file is = %s and length =0x%lx to be downloaded at =0x%x", path, (unsigned long)flp->f_dentry->d_inode->i_size, loc); + BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Opened file is = %s and length =0x%lx to be downloaded at =0x%x", path, (unsigned long)file_inode(flp)->i_size, loc); do_gettimeofday(&tv); BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "download start %lx", ((tv.tv_sec * 1000) + (tv.tv_usec / 1000))); diff --git a/drivers/staging/ccg/f_mass_storage.c b/drivers/staging/ccg/f_mass_storage.c index 4f1142efa6d1..20bc2b454ac2 100644 --- a/drivers/staging/ccg/f_mass_storage.c +++ b/drivers/staging/ccg/f_mass_storage.c @@ -998,7 +998,7 @@ static int do_synchronize_cache(struct fsg_common *common) static void invalidate_sub(struct fsg_lun *curlun) { struct file *filp = curlun->filp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned long rc; rc = invalidate_mapping_pages(inode->i_mapping, 0, -1); diff --git a/drivers/staging/ccg/rndis.c b/drivers/staging/ccg/rndis.c index e4192b887de9..d9297eebbf73 100644 --- a/drivers/staging/ccg/rndis.c +++ b/drivers/staging/ccg/rndis.c @@ -1065,7 +1065,7 @@ static int rndis_proc_show(struct seq_file *m, void *v) static ssize_t rndis_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - rndis_params *p = PDE(file->f_path.dentry->d_inode)->data; + rndis_params *p = PDE(file_inode(file))->data; u32 speed = 0; int i, fl_speed = 0; diff --git a/drivers/staging/ccg/storage_common.c b/drivers/staging/ccg/storage_common.c index 8d9bcd8207c8..abb01ac74cec 100644 --- a/drivers/staging/ccg/storage_common.c +++ b/drivers/staging/ccg/storage_common.c @@ -656,7 +656,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_WRITE)) ro = 1; - inode = filp->f_path.dentry->d_inode; + inode = file_inode(filp); if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { LINFO(curlun, "invalid file type: %s\n", filename); goto out; diff --git a/drivers/staging/dgrp/dgrp_specproc.c b/drivers/staging/dgrp/dgrp_specproc.c index c214078a89e9..b2bda32254ac 100644 --- a/drivers/staging/dgrp/dgrp_specproc.c +++ b/drivers/staging/dgrp/dgrp_specproc.c @@ -354,7 +354,7 @@ static int dgrp_gen_proc_open(struct inode *inode, struct file *file) struct dgrp_proc_entry *entry; int ret = 0; - de = (struct proc_dir_entry *) PDE(file->f_dentry->d_inode); + de = (struct proc_dir_entry *) PDE(file_inode(file)); if (!de || !de->data) { ret = -ENXIO; goto done; @@ -384,7 +384,7 @@ static int dgrp_gen_proc_close(struct inode *inode, struct file *file) struct proc_dir_entry *de; struct dgrp_proc_entry *entry; - de = (struct proc_dir_entry *) PDE(file->f_dentry->d_inode); + de = (struct proc_dir_entry *) PDE(file_inode(file)); if (!de || !de->data) goto done; diff --git a/drivers/staging/omapdrm/omap_gem_helpers.c b/drivers/staging/omapdrm/omap_gem_helpers.c index ffb8cceaeb46..7d1b64a7404b 100644 --- a/drivers/staging/omapdrm/omap_gem_helpers.c +++ b/drivers/staging/omapdrm/omap_gem_helpers.c @@ -40,7 +40,7 @@ struct page **_drm_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask) int i, npages; /* This is the shared memory object that backs the GEM resource */ - inode = obj->filp->f_path.dentry->d_inode; + inode = file_inode(obj->filp); mapping = inode->i_mapping; npages = obj->size >> PAGE_SHIFT; diff --git a/drivers/staging/usbip/usbip_common.c b/drivers/staging/usbip/usbip_common.c index 75189feac380..773014c7c752 100644 --- a/drivers/staging/usbip/usbip_common.c +++ b/drivers/staging/usbip/usbip_common.c @@ -411,7 +411,7 @@ struct socket *sockfd_to_socket(unsigned int sockfd) return NULL; } - inode = file->f_dentry->d_inode; + inode = file_inode(file); if (!inode || !S_ISSOCK(inode->i_mode)) { fput(file); diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c index 4ef852c4c4e1..869ce93ee204 100644 --- a/drivers/staging/vme/devices/vme_user.c +++ b/drivers/staging/vme/devices/vme_user.c @@ -318,7 +318,7 @@ static ssize_t buffer_from_user(unsigned int minor, const char __user *buf, static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev); + unsigned int minor = MINOR(file_inode(file)->i_rdev); ssize_t retval; size_t image_size; size_t okcount; @@ -364,7 +364,7 @@ static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count, static ssize_t vme_user_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev); + unsigned int minor = MINOR(file_inode(file)->i_rdev); ssize_t retval; size_t image_size; size_t okcount; @@ -410,7 +410,7 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf, static loff_t vme_user_llseek(struct file *file, loff_t off, int whence) { loff_t absolute = -1; - unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev); + unsigned int minor = MINOR(file_inode(file)->i_rdev); size_t image_size; if (minor == CONTROL_MINOR) @@ -583,7 +583,7 @@ vme_user_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) int ret; mutex_lock(&vme_user_mutex); - ret = vme_user_ioctl(file->f_path.dentry->d_inode, file, cmd, arg); + ret = vme_user_ioctl(file_inode(file), file, cmd, arg); mutex_unlock(&vme_user_mutex); return ret; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b9c88497e8f0..a65d507b0b0c 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -265,7 +265,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, * the expected virt_size for struct file w/o a backing struct * block_device. */ - if (S_ISBLK(fd->f_dentry->d_inode->i_mode)) { + if (S_ISBLK(file_inode(fd)->i_mode)) { if (ret < 0 || ret != cmd->data_length) { pr_err("%s() returned %d, expecting %u for " "S_ISBLK\n", __func__, ret, diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index fa7268a93c06..e4ca345873c3 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -101,7 +101,7 @@ vcs_poll_data_get(struct file *file) poll = kzalloc(sizeof(*poll), GFP_KERNEL); if (!poll) return NULL; - poll->cons_num = iminor(file->f_path.dentry->d_inode) & 127; + poll->cons_num = iminor(file_inode(file)) & 127; init_waitqueue_head(&poll->waitq); poll->notifier.notifier_call = vcs_notifier; if (register_vt_notifier(&poll->notifier) != 0) { @@ -182,7 +182,7 @@ static loff_t vcs_lseek(struct file *file, loff_t offset, int orig) int size; console_lock(); - size = vcs_size(file->f_path.dentry->d_inode); + size = vcs_size(file_inode(file)); console_unlock(); if (size < 0) return size; @@ -208,7 +208,7 @@ static loff_t vcs_lseek(struct file *file, loff_t offset, int orig) static ssize_t vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); unsigned int currcons = iminor(inode); struct vc_data *vc; struct vcs_poll_data *poll; @@ -386,7 +386,7 @@ unlock_out: static ssize_t vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); unsigned int currcons = iminor(inode); struct vc_data *vc; long pos; diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index cbacea933b18..2c42e06f9717 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -658,7 +658,7 @@ static loff_t usb_device_lseek(struct file *file, loff_t offset, int orig) { loff_t ret; - mutex_lock(&file->f_dentry->d_inode->i_mutex); + mutex_lock(&file_inode(file)->i_mutex); switch (orig) { case 0: @@ -674,7 +674,7 @@ static loff_t usb_device_lseek(struct file *file, loff_t offset, int orig) ret = -EINVAL; } - mutex_unlock(&file->f_dentry->d_inode->i_mutex); + mutex_unlock(&file_inode(file)->i_mutex); return ret; } diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index b78fbe222b72..6e8af6ddb5f7 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -160,7 +160,7 @@ static loff_t usbdev_lseek(struct file *file, loff_t offset, int orig) { loff_t ret; - mutex_lock(&file->f_dentry->d_inode->i_mutex); + mutex_lock(&file_inode(file)->i_mutex); switch (orig) { case 0: @@ -176,7 +176,7 @@ static loff_t usbdev_lseek(struct file *file, loff_t offset, int orig) ret = -EINVAL; } - mutex_unlock(&file->f_dentry->d_inode->i_mutex); + mutex_unlock(&file_inode(file)->i_mutex); return ret; } @@ -1970,7 +1970,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, void __user *p) { struct dev_state *ps = file->private_data; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct usb_device *dev = ps->dev; int ret = -ENOTTY; diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c index a7aed84d98c9..fd49dba53613 100644 --- a/drivers/usb/gadget/atmel_usba_udc.c +++ b/drivers/usb/gadget/atmel_usba_udc.c @@ -93,7 +93,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf, if (!access_ok(VERIFY_WRITE, buf, nbytes)) return -EFAULT; - mutex_lock(&file->f_dentry->d_inode->i_mutex); + mutex_lock(&file_inode(file)->i_mutex); list_for_each_entry_safe(req, tmp_req, queue, queue) { len = snprintf(tmpbuf, sizeof(tmpbuf), "%8p %08x %c%c%c %5d %c%c%c\n", @@ -120,7 +120,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf, nbytes -= len; buf += len; } - mutex_unlock(&file->f_dentry->d_inode->i_mutex); + mutex_unlock(&file_inode(file)->i_mutex); return actual; } @@ -168,13 +168,13 @@ out: static ssize_t regs_dbg_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); int ret; mutex_lock(&inode->i_mutex); ret = simple_read_from_buffer(buf, nbytes, ppos, file->private_data, - file->f_dentry->d_inode->i_size); + file_inode(file)->i_size); mutex_unlock(&inode->i_mutex); return ret; diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c index 5d027b3e1ef0..50a46a429efb 100644 --- a/drivers/usb/gadget/f_mass_storage.c +++ b/drivers/usb/gadget/f_mass_storage.c @@ -992,7 +992,7 @@ static int do_synchronize_cache(struct fsg_common *common) static void invalidate_sub(struct fsg_lun *curlun) { struct file *filp = curlun->filp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned long rc; rc = invalidate_mapping_pages(inode->i_mapping, 0, -1); diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c index 35bcc83d1e04..bf7a56b6d48a 100644 --- a/drivers/usb/gadget/printer.c +++ b/drivers/usb/gadget/printer.c @@ -688,7 +688,7 @@ static int printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync) { struct printer_dev *dev = fd->private_data; - struct inode *inode = fd->f_path.dentry->d_inode; + struct inode *inode = file_inode(fd); unsigned long flags; int tx_list_empty; diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c index e4192b887de9..d9297eebbf73 100644 --- a/drivers/usb/gadget/rndis.c +++ b/drivers/usb/gadget/rndis.c @@ -1065,7 +1065,7 @@ static int rndis_proc_show(struct seq_file *m, void *v) static ssize_t rndis_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - rndis_params *p = PDE(file->f_path.dentry->d_inode)->data; + rndis_params *p = PDE(file_inode(file))->data; u32 speed = 0; int i, fl_speed = 0; diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c index 0e3ae43454a2..b5d3f0eeeb7d 100644 --- a/drivers/usb/gadget/storage_common.c +++ b/drivers/usb/gadget/storage_common.c @@ -501,7 +501,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_WRITE)) ro = 1; - inode = filp->f_path.dentry->d_inode; + inode = file_inode(filp); if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { LINFO(curlun, "invalid file type: %s\n", filename); goto out; diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 88cad6b8b479..900aa4ecd617 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -69,7 +69,7 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma, int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fb_info *info = file->private_data; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int err = filemap_write_and_wait_range(inode->i_mapping, start, end); if (err) return err; diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 3ff0105a496a..bf01980c9554 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -727,7 +727,7 @@ static const struct file_operations fb_proc_fops = { */ static struct fb_info *file_fb_info(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; diff --git a/drivers/video/msm/mdp.c b/drivers/video/msm/mdp.c index f2566c19e71c..113c7876c855 100644 --- a/drivers/video/msm/mdp.c +++ b/drivers/video/msm/mdp.c @@ -261,7 +261,7 @@ int get_img(struct mdp_img *img, struct fb_info *info, if (f.file == NULL) return -1; - if (MAJOR(f.file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { + if (MAJOR(file_inode(f.file)->i_rdev) == FB_MAJOR) { *start = info->fix.smem_start; *len = info->fix.smem_len; } else diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index 11d55ce5ca81..70387582843f 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -411,7 +411,7 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) .identity = DRIVER_NAME, }; void __user *argp = (void __user *)arg; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int index = iminor(inode) - WD0_MINOR; struct cpwd *p = cpwd_device; int setopt = 0; @@ -499,7 +499,7 @@ static long cpwd_compat_ioctl(struct file *file, unsigned int cmd, static ssize_t cpwd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct cpwd *p = cpwd_device; int index = iminor(inode); diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index 988880dcee75..73b33837e12c 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -22,7 +22,7 @@ static loff_t proc_bus_zorro_lseek(struct file *file, loff_t off, int whence) { loff_t new = -1; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); mutex_lock(&inode->i_mutex); switch (whence) { @@ -47,7 +47,7 @@ proc_bus_zorro_lseek(struct file *file, loff_t off, int whence) static ssize_t proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct inode *ino = file->f_path.dentry->d_inode; + struct inode *ino = file_inode(file); struct proc_dir_entry *dp = PDE(ino); struct zorro_dev *z = dp->data; struct ConfigDev cd; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index c2483e97beee..3356e3ed5115 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -133,7 +133,7 @@ out_error: static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) { int res = 0; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); @@ -302,7 +302,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int ret = -ENOLCK; p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", @@ -338,7 +338,7 @@ out_err: static int v9fs_file_flock_dotl(struct file *filp, int cmd, struct file_lock *fl) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int ret = -ENOLCK; p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", @@ -529,7 +529,7 @@ v9fs_file_write(struct file *filp, const char __user * data, if (!count) goto out; - retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode, + retval = v9fs_file_write_internal(file_inode(filp), filp->private_data, data, count, &origin, 1); /* update offset on successful write */ @@ -604,7 +604,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct v9fs_inode *v9inode; struct page *page = vmf->page; struct file *filp = vma->vm_file; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index b3be2e7c5643..9cf874ce8336 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -19,7 +19,7 @@ static DEFINE_RWLOCK(adfs_dir_lock); static int adfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir; struct object_info obj; diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 8ca8f3a55599..fd11a6d608ee 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -42,7 +42,7 @@ const struct inode_operations affs_dir_inode_operations = { static int affs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; struct buffer_head *dir_bh; struct buffer_head *fh_bh; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index db477906ba4f..7a465ed04444 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -393,12 +393,12 @@ static int afs_readdir(struct file *file, void *cookie, filldir_t filldir) int ret; _enter("{%Ld,{%lu}}", - file->f_pos, file->f_path.dentry->d_inode->i_ino); + file->f_pos, file_inode(file)->i_ino); ASSERT(file->private_data != NULL); fpos = file->f_pos; - ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos, + ret = afs_dir_iterate(file_inode(file), &fpos, cookie, filldir, file->private_data); file->f_pos = fpos; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 757d664575dd..2497bf306c70 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -514,7 +514,7 @@ error: */ int afs_lock(struct file *file, int cmd, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}", vnode->fid.vid, vnode->fid.vnode, cmd, @@ -537,7 +537,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl) */ int afs_flock(struct file *file, int cmd, struct file_lock *fl) { - struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); _enter("{%x:%u},%d,{t=%x,fl=%x}", vnode->fid.vid, vnode->fid.vnode, cmd, diff --git a/fs/afs/write.c b/fs/afs/write.c index 9aa52d93c73c..7e03eadb40c0 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -120,7 +120,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct afs_writeback *candidate, *wb; - struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; struct key *key = file->private_data; unsigned from = pos & (PAGE_CACHE_SIZE - 1); @@ -245,7 +245,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); loff_t i_size, maybe_i_size; _enter("{%x:%u},{%lx}", @@ -627,8 +627,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct dentry *dentry = iocb->ki_filp->f_path.dentry; - struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); + struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); ssize_t result; size_t count = iov_length(iov, nr_segs); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index b785e7707959..3f1128b37e46 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -273,7 +273,7 @@ static inline int autofs_prepare_pipe(struct file *pipe) { if (!pipe->f_op || !pipe->f_op->write) return -EINVAL; - if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode)) + if (!S_ISFIFO(file_inode(pipe)->i_mode)) return -EINVAL; /* We want a packet pipe */ pipe->f_flags |= O_DIRECT; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 9f68a37bb2b2..743c7c2c949d 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -159,7 +159,7 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) struct inode *inode; if (f) { - inode = f->f_path.dentry->d_inode; + inode = file_inode(f); sbi = autofs4_sbi(inode->i_sb); } return sbi; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index c93447604da8..3cdf835e8b49 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -874,7 +874,7 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, static long autofs4_root_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); } @@ -882,7 +882,7 @@ static long autofs4_root_ioctl(struct file *filp, static long autofs4_root_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int ret; if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 2b3bda8d5e68..c8f4e25eb9e2 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -213,7 +213,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) static int befs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; befs_off_t value; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 2785ef91191a..3f422f6bb5ca 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -28,7 +28,7 @@ static struct buffer_head *bfs_find_entry(struct inode *dir, static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) { - struct inode *dir = f->f_path.dentry->d_inode; + struct inode *dir = file_inode(f); struct buffer_head *bh; struct bfs_dirent *de; struct bfs_sb_info *info = BFS_SB(dir->i_sb); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 6043567b95c2..bbc8f8827eac 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -214,7 +214,7 @@ static int load_aout_binary(struct linux_binprm * bprm) if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || - i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } @@ -367,7 +367,7 @@ static int load_aout_library(struct file *file) int retval; struct exec ex; - inode = file->f_path.dentry->d_inode; + inode = file_inode(file); retval = -ENOEXEC; error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0c42cdbabecf..11e078a747a5 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1140,7 +1140,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, /* By default, dump shared memory if mapped from an anonymous file. */ if (vma->vm_flags & VM_SHARED) { - if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? + if (file_inode(vma->vm_file)->i_nlink == 0 ? FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) goto whole; return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index dc84732e554f..30de01ca3eeb 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -909,7 +909,7 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, dynamic_error: printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n", - what, file->f_path.dentry->d_inode->i_ino); + what, file_inode(file)->i_ino); return -ELIBBAD; } @@ -1219,7 +1219,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) /* By default, dump shared memory if mapped from an anonymous file. */ if (vma->vm_flags & VM_SHARED) { - if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) { + if (file_inode(vma->vm_file)->i_nlink == 0) { dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags); kdcore("%08lx: %08lx: %s (share)", vma->vm_start, vma->vm_flags, dump_ok ? "yes" : "no"); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index b56371981d16..2036d21baaef 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -438,7 +438,7 @@ static int load_flat_file(struct linux_binprm * bprm, int ret; hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */ - inode = bprm->file->f_path.dentry->d_inode; + inode = file_inode(bprm->file); text_len = ntohl(hdr->data_start); data_len = ntohl(hdr->data_end) - ntohl(hdr->data_start); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 0c8869fdd14e..fecbbf3f8ff2 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -531,7 +531,7 @@ static void kill_node(Node *e) static ssize_t bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) { - Node *e = file->f_path.dentry->d_inode->i_private; + Node *e = file_inode(file)->i_private; ssize_t res; char *page; @@ -550,7 +550,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct dentry *root; - Node *e = file->f_path.dentry->d_inode->i_private; + Node *e = file_inode(file)->i_private; int res = parse_command(buffer, count); switch (res) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 172f8491a2bd..7d6bdfc6b7bc 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -318,7 +318,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, /* * private llseek: - * for a block special file file->f_path.dentry->d_inode->i_size is zero + * for a block special file file_inode(file)->i_size is zero * so we compute the size by hand (just as in block_read/write above) */ static loff_t block_llseek(struct file *file, loff_t offset, int whence) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 77061bf43edb..4118e0b6e339 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1211,7 +1211,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, struct extent_state *cached_state = NULL; int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int err = 0; int faili = 0; @@ -1298,7 +1298,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, struct iov_iter *i, loff_t pos) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; unsigned long first_index; @@ -1486,7 +1486,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; loff_t *ppos = &iocb->ki_pos; u64 start_pos; @@ -2087,7 +2087,7 @@ out: static long btrfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct extent_state *cached_state = NULL; u64 cur_offset; u64 last_byte; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 16d9e8e191e6..02d946a61ddd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4342,7 +4342,7 @@ unsigned char btrfs_filetype_table[] = { static int btrfs_real_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_item *item; struct btrfs_dir_item *di; @@ -6737,7 +6737,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = fdentry(vma->vm_file)->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4b4516770f05..61045adc3075 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -152,7 +152,7 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) static int btrfs_ioctl_getflags(struct file *file, void __user *arg) { - struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode); + struct btrfs_inode *ip = BTRFS_I(file_inode(file)); unsigned int flags = btrfs_flags_to_ioctl(ip->flags); if (copy_to_user(arg, &flags, sizeof(flags))) @@ -177,7 +177,7 @@ static int check_flags(unsigned int flags) static int btrfs_ioctl_setflags(struct file *file, void __user *arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct btrfs_inode *ip = BTRFS_I(inode); struct btrfs_root *root = ip->root; struct btrfs_trans_handle *trans; @@ -310,7 +310,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) static int btrfs_ioctl_getversion(struct file *file, int __user *arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); return put_user(inode->i_generation, arg); } @@ -1317,7 +1317,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, u64 new_size; u64 old_size; u64 devid = 1; - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_trans_handle *trans; struct btrfs_device *device = NULL; @@ -1483,8 +1483,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, goto out_drop_write; } - src_inode = src.file->f_path.dentry->d_inode; - if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { + src_inode = file_inode(src.file); + if (src_inode->i_sb != file_inode(file)->i_sb) { printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); ret = -EINVAL; @@ -1576,7 +1576,7 @@ out: static noinline int btrfs_ioctl_subvol_getflags(struct file *file, void __user *arg) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; u64 flags = 0; @@ -1598,7 +1598,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file, static noinline int btrfs_ioctl_subvol_setflags(struct file *file, void __user *arg) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; u64 root_flags; @@ -1892,7 +1892,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, if (IS_ERR(args)) return PTR_ERR(args); - inode = fdentry(file)->d_inode; + inode = file_inode(file); ret = search_ioctl(inode, args); if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; @@ -2002,7 +2002,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, if (IS_ERR(args)) return PTR_ERR(args); - inode = fdentry(file)->d_inode; + inode = file_inode(file); if (args->treeid == 0) args->treeid = BTRFS_I(inode)->root->root_key.objectid; @@ -2178,7 +2178,7 @@ out: static int btrfs_ioctl_defrag(struct file *file, void __user *argp) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_defrag_range_args *range; int ret; @@ -2237,7 +2237,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) /* the rest are all set to zero by kzalloc */ range->len = (u64)-1; } - ret = btrfs_defrag_file(fdentry(file)->d_inode, file, + ret = btrfs_defrag_file(file_inode(file), file, range, 0, 0); if (ret > 0) ret = 0; @@ -2285,7 +2285,7 @@ out: static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_vol_args *vol_args; int ret; @@ -2408,7 +2408,7 @@ out: static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct fd src_file; struct inode *src; @@ -2454,7 +2454,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (src_file.file->f_path.mnt != file->f_path.mnt) goto out_fput; - src = src_file.file->f_dentry->d_inode; + src = file_inode(src_file.file); ret = -EINVAL; if (src == inode) @@ -2816,7 +2816,7 @@ static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) */ static long btrfs_ioctl_trans_start(struct file *file) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret; @@ -2856,7 +2856,7 @@ out: static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *new_root; struct btrfs_dir_item *di; @@ -3080,7 +3080,7 @@ out: */ long btrfs_ioctl_trans_end(struct file *file) { - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -3142,7 +3142,7 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, static long btrfs_ioctl_scrub(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_scrub_args *sa; int ret; @@ -3433,7 +3433,7 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, static long btrfs_ioctl_balance(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_ioctl_balance_args *bargs; struct btrfs_balance_control *bctl; @@ -3573,7 +3573,7 @@ out: static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_quota_ctl_args *sa; struct btrfs_trans_handle *trans = NULL; int ret; @@ -3632,7 +3632,7 @@ drop_write: static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_qgroup_assign_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3679,7 +3679,7 @@ drop_write: static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_qgroup_create_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3725,7 +3725,7 @@ drop_write: static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_ioctl_qgroup_limit_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3775,7 +3775,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, void __user *arg) { struct btrfs_ioctl_received_subvol_args *sa = NULL; - struct inode *inode = fdentry(file)->d_inode; + struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root_item *root_item = &root->root_item; struct btrfs_trans_handle *trans; @@ -3855,7 +3855,7 @@ out: long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_root *root = BTRFS_I(file_inode(file))->root; void __user *argp = (void __user *)arg; switch (cmd) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 54454542ad40..f80df6b04648 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4542,7 +4542,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root; + send_root = BTRFS_I(file_inode(mnt_file))->root; fs_info = send_root->fs_info; arg = memdup_user(arg_, sizeof(*arg)); diff --git a/fs/buffer.c b/fs/buffer.c index 7a75c3e0fd58..b8a8b4d64d8c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2332,7 +2332,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); unsigned long end; loff_t size; int ret; @@ -2371,7 +2371,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { int ret; - struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; + struct super_block *sb = file_inode(vma->vm_file)->i_sb; sb_start_pagefault(sb); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 064d1a68d2c1..d4f81edd9a5d 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -195,7 +195,7 @@ static int ceph_releasepage(struct page *page, gfp_t g) */ static int readpage_nounlock(struct file *filp, struct page *page) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; @@ -370,7 +370,7 @@ out: static int ceph_readpages(struct file *file, struct address_space *mapping, struct list_head *page_list, unsigned nr_pages) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); int rc = 0; int max = 0; @@ -977,7 +977,7 @@ static int ceph_update_writeable_page(struct file *file, loff_t pos, unsigned len, struct page *page) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; loff_t page_off = pos & PAGE_CACHE_MASK; @@ -1086,7 +1086,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = file->private_data; struct page *page; @@ -1144,7 +1144,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -1228,7 +1228,7 @@ const struct address_space_operations ceph_aops = { */ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = vma->vm_file->f_dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct page *page = vmf->page; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; loff_t off = page_offset(page); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 8c1aabe93b67..6d797f46d772 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -238,7 +238,7 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name, static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct ceph_file_info *fi = filp->private_data; - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -1138,7 +1138,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct ceph_file_info *cf = file->private_data; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); int left; const int bufsize = 1024; @@ -1188,7 +1188,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct list_head *head = &ci->i_unsafe_dirops; struct ceph_mds_request *req; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e51558fca3a3..11b57c2c8f15 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -393,7 +393,7 @@ more: static ssize_t ceph_sync_read(struct file *file, char __user *data, unsigned len, loff_t *poff, int *checkeof) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct page **pages; u64 off = *poff; int num_pages, ret; @@ -466,7 +466,7 @@ static void sync_write_commit(struct ceph_osd_request *req, static ssize_t ceph_sync_write(struct file *file, const char __user *data, size_t left, loff_t *offset) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req; @@ -483,7 +483,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, int ret; struct timespec mtime = CURRENT_TIME; - if (ceph_snap(file->f_dentry->d_inode) != CEPH_NOSNAP) + if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; dout("sync_write on file %p %lld~%u %s\n", file, *offset, @@ -637,7 +637,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, struct ceph_file_info *fi = filp->private_data; loff_t *ppos = &iocb->ki_pos; size_t len = iov->iov_len; - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); void __user *base = iov->iov_base; ssize_t ret; @@ -707,7 +707,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct file *file = iocb->ki_filp; struct ceph_file_info *fi = file->private_data; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 36549a46e311..f5ed767806df 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -16,11 +16,11 @@ */ static long ceph_ioctl_get_layout(struct file *file, void __user *arg) { - struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode); + struct ceph_inode_info *ci = ceph_inode(file_inode(file)); struct ceph_ioctl_layout l; int err; - err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT); + err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT); if (!err) { l.stripe_unit = ceph_file_layout_su(ci->i_layout); l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout); @@ -63,12 +63,12 @@ static long __validate_layout(struct ceph_mds_client *mdsc, static long ceph_ioctl_set_layout(struct file *file, void __user *arg) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct inode *parent_inode; struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; struct ceph_ioctl_layout l; - struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode); + struct ceph_inode_info *ci = ceph_inode(file_inode(file)); struct ceph_ioctl_layout nl; int err; @@ -76,7 +76,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) return -EFAULT; /* validate changed params against current layout */ - err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT); + err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT); if (err) return err; @@ -136,7 +136,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) */ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_mds_request *req; struct ceph_ioctl_layout l; int err; @@ -179,7 +179,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) { struct ceph_ioctl_dataloc dl; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; @@ -234,7 +234,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) static long ceph_ioctl_lazyio(struct file *file) { struct ceph_file_info *fi = file->private_data; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 80576d05d687..202dd3d68be0 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -13,7 +13,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, int cmd, u8 wait, struct file_lock *fl) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index de7f9168a118..8b35365c70be 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -677,7 +677,7 @@ out_nls: static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); ssize_t written; int rc; @@ -701,7 +701,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) */ if (whence != SEEK_SET && whence != SEEK_CUR) { int rc; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); /* * We need to be sure that all dirty pages are written and the @@ -733,7 +733,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) { /* note that this is called by vfs setlease with lock_flocks held to protect *lease from going away */ - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; if (!(S_ISREG(inode->i_mode))) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8ea6ca50a665..1a5c2911b043 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -947,7 +947,7 @@ static int cifs_posix_lock_test(struct file *file, struct file_lock *flock) { int rc = 0; - struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); unsigned char saved_type = flock->fl_type; if ((flock->fl_flags & FL_POSIX) == 0) @@ -974,7 +974,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) static int cifs_posix_lock_set(struct file *file, struct file_lock *flock) { - struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); int rc = 1; if ((flock->fl_flags & FL_POSIX) == 0) @@ -1548,7 +1548,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); netfid = cfile->fid.netfid; - cinode = CIFS_I(file->f_path.dentry->d_inode); + cinode = CIFS_I(file_inode(file)); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && @@ -2171,7 +2171,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); rc = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -2246,7 +2246,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ int cifs_flush(struct file *file, fl_owner_t id) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int rc = 0; if (file->f_mode & FMODE_WRITE) @@ -2480,7 +2480,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, ssize_t written; struct inode *inode; - inode = iocb->ki_filp->f_path.dentry->d_inode; + inode = file_inode(iocb->ki_filp); /* * BB - optimize the way when signing is disabled. We can drop this @@ -2543,7 +2543,7 @@ ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = (struct cifsFileInfo *) @@ -2915,7 +2915,7 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = (struct cifsFileInfo *) @@ -3063,7 +3063,7 @@ static struct vm_operations_struct cifs_file_vm_ops = { int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) { int rc, xid; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); xid = get_xid(); @@ -3356,7 +3356,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page, int rc; /* Is the page cached? */ - rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page); + rc = cifs_readpage_from_fscache(file_inode(file), page); if (rc == 0) goto read_complete; @@ -3371,8 +3371,8 @@ static int cifs_readpage_worker(struct file *file, struct page *page, else cFYI(1, "Bytes read %d", rc); - file->f_path.dentry->d_inode->i_atime = - current_fs_time(file->f_path.dentry->d_inode->i_sb); + file_inode(file)->i_atime = + current_fs_time(file_inode(file)->i_sb); if (PAGE_CACHE_SIZE > rc) memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc); @@ -3381,7 +3381,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page, SetPageUptodate(page); /* send this page to the cache */ - cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page); + cifs_readpage_to_fscache(file_inode(file), page); rc = 0; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ed6208ff85a7..1fc864b92cf2 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -289,7 +289,7 @@ cifs_get_file_info_unix(struct file *filp) unsigned int xid; FILE_UNIX_BASIC_INFO find_data; struct cifs_fattr fattr; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); @@ -558,7 +558,7 @@ cifs_get_file_info(struct file *filp) unsigned int xid; FILE_ALL_INFO find_data; struct cifs_fattr fattr; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); @@ -1678,7 +1678,7 @@ cifs_invalidate_mapping(struct inode *inode) int cifs_revalidate_file_attr(struct file *filp) { int rc = 0; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; if (!cifs_inode_needs_reval(inode)) @@ -1735,7 +1735,7 @@ out: int cifs_revalidate_file(struct file *filp) { int rc; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); rc = cifs_revalidate_file_attr(filp); if (rc) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index fd5009d56f9f..6c9f1214cf0b 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -30,7 +30,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { - struct inode *inode = filep->f_dentry->d_inode; + struct inode *inode = file_inode(filep); int rc = -ENOTTY; /* strange error - but the precedent */ unsigned int xid; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index cdd6ff48246b..7255b0c7aa7e 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -505,7 +505,7 @@ static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode) whether we can use the cached search results from the previous search */ static int is_dir_changed(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsInfo = CIFS_I(inode); if (cifsInfo->time == 0) @@ -778,7 +778,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) switch ((int) file->f_pos) { case 0: if (filldir(direntry, ".", 1, file->f_pos, - file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) { + file_inode(file)->i_ino, DT_DIR) < 0) { cERROR(1, "Filldir for current dir failed"); rc = -ENOMEM; break; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 49fe52d25600..b7d3a05c062c 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -397,7 +397,7 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) * We can't use vfs_readdir because we have to keep the file * position in sync between the coda_file and the host_file. * and as such we need grab the inode mutex. */ - struct inode *host_inode = host_file->f_path.dentry->d_inode; + struct inode *host_inode = file_inode(host_file); mutex_lock(&host_inode->i_mutex); host_file->f_pos = coda_file->f_pos; diff --git a/fs/coda/file.c b/fs/coda/file.c index 8edd404e6419..fa4c100bdc7d 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -66,7 +66,7 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos, static ssize_t coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos) { - struct inode *host_inode, *coda_inode = coda_file->f_path.dentry->d_inode; + struct inode *host_inode, *coda_inode = file_inode(coda_file); struct coda_file_info *cfi; struct file *host_file; ssize_t ret; @@ -78,7 +78,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo if (!host_file->f_op || !host_file->f_op->write) return -EINVAL; - host_inode = host_file->f_path.dentry->d_inode; + host_inode = file_inode(host_file); mutex_lock(&coda_inode->i_mutex); ret = host_file->f_op->write(host_file, buf, count, ppos); @@ -106,8 +106,8 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) if (!host_file->f_op || !host_file->f_op->mmap) return -ENODEV; - coda_inode = coda_file->f_path.dentry->d_inode; - host_inode = host_file->f_path.dentry->d_inode; + coda_inode = file_inode(coda_file); + host_inode = file_inode(host_file); cii = ITOC(coda_inode); spin_lock(&cii->c_lock); @@ -178,7 +178,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, coda_file->f_cred->fsuid); - host_inode = cfi->cfi_container->f_path.dentry->d_inode; + host_inode = file_inode(cfi->cfi_container); cii = ITOC(coda_inode); /* did we mmap this file? */ @@ -202,7 +202,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync) { struct file *host_file; - struct inode *coda_inode = coda_file->f_path.dentry->d_inode; + struct inode *coda_inode = file_inode(coda_file); struct coda_file_info *cfi; int err; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index be2aa4909487..6df708c7b3e8 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -129,7 +129,7 @@ static int get_device_index(struct coda_mount_data *data) f = fdget(data->fd); if (!f.file) goto Ebadf; - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { fdput(f); goto Ebadf; diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index ee0981f1375b..3f5de96bbb58 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -52,7 +52,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd, struct path path; int error; struct PioctlData data; - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct inode *target_inode = NULL; struct coda_inode_info *cnp; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index e2f57a007029..3ced75f765ca 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1582,7 +1582,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, case FIBMAP: case FIGETBSZ: case FIONREAD: - if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode)) + if (S_ISREG(file_inode(f.file)->i_mode)) break; /*FALL THROUGH*/ diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 712b10f64c70..90d222f11e36 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1625,7 +1625,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) if (offset >= 0) break; default: - mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&file_inode(file)->i_mutex); return -EINVAL; } if (offset != file->f_pos) { diff --git a/fs/coredump.c b/fs/coredump.c index 177493272a61..69baf903d3bd 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -411,7 +411,7 @@ static void wait_for_dump_helpers(struct file *file) { struct pipe_inode_info *pipe; - pipe = file->f_path.dentry->d_inode->i_pipe; + pipe = file_inode(file)->i_pipe; pipe_lock(pipe); pipe->readers++; @@ -600,7 +600,7 @@ void do_coredump(siginfo_t *siginfo) if (IS_ERR(cprm.file)) goto fail_unlock; - inode = cprm.file->f_path.dentry->d_inode; + inode = file_inode(cprm.file); if (inode->i_nlink > 1) goto close_fail; if (d_unhashed(cprm.file->f_path.dentry)) diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index c6c3f91ecf06..3ceb9ec976e1 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -351,7 +351,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) */ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; char *buf; unsigned int offset; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index d45ba4568128..53acc9d0c138 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -118,7 +118,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) lower_file = ecryptfs_file_to_lower(file); lower_file->f_pos = file->f_pos; - inode = file->f_path.dentry->d_inode; + inode = file_inode(file); memset(&buf, 0, sizeof(buf)); buf.dirent = dirent; buf.dentry = file->f_path.dentry; @@ -133,7 +133,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) goto out; if (rc >= 0) fsstack_copy_attr_atime(inode, - lower_file->f_path.dentry->d_inode); + file_inode(lower_file)); out: return rc; } diff --git a/fs/efs/dir.c b/fs/efs/dir.c index 7ee6f7e3a608..055a9e9ca747 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c @@ -20,7 +20,7 @@ const struct inode_operations efs_dir_inode_operations = { }; static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct buffer_head *bh; struct efs_dir *dirblock; diff --git a/fs/exec.c b/fs/exec.c index 20df02c1cc70..7b6f4d59b26c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -123,7 +123,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto out; error = -EINVAL; - if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + if (!S_ISREG(file_inode(file)->i_mode)) goto exit; error = -EACCES; @@ -764,7 +764,7 @@ struct file *open_exec(const char *name) goto out; err = -EACCES; - if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + if (!S_ISREG(file_inode(file)->i_mode)) goto exit; if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) @@ -1098,7 +1098,7 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { - if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0) + if (inode_permission(file_inode(file), MAY_READ) < 0) bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; } EXPORT_SYMBOL(would_dump); @@ -1270,7 +1270,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm) int prepare_binprm(struct linux_binprm *bprm) { umode_t mode; - struct inode * inode = bprm->file->f_path.dentry->d_inode; + struct inode * inode = file_inode(bprm->file); int retval; mode = inode->i_mode; diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index c61e62ac231c..46375896cfc0 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -242,7 +242,7 @@ static int exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) { loff_t pos = filp->f_pos; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; unsigned long npages = dir_pages(inode); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 0f4f5c929257..4237722bfd27 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -290,7 +290,7 @@ static int ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) { loff_t pos = filp->f_pos; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 2de655f5d625..5d46c09863f0 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -19,7 +19,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct ext2_inode_info *ei = EXT2_I(inode); unsigned int flags; unsigned short rsv_window_size; diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index dd91264ba94f..87eccbbca255 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -99,7 +99,7 @@ static int ext3_readdir(struct file * filp, int i, stored; struct ext3_dir_entry_2 *de; int err; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; int ret = 0; int dir_has_error = 0; @@ -114,7 +114,7 @@ static int ext3_readdir(struct file * filp, * We don't set the inode dirty flag since it's not * critical that it get flushed back to the disk. */ - EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; + EXT3_I(file_inode(filp))->i_flags &= ~EXT3_INDEX_FL; } stored = 0; offset = filp->f_pos & (sb->s_blocksize - 1); @@ -457,7 +457,7 @@ static int call_filldir(struct file * filp, void * dirent, { struct dir_private_info *info = filp->private_data; loff_t curr_pos; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block * sb; int error; @@ -487,7 +487,7 @@ static int ext3_dx_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct dir_private_info *info = filp->private_data; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct fname *fname; int ret; diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 677a5c27dc69..4d96e9a64532 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -14,7 +14,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct ext3_inode_info *ei = EXT3_I(inode); unsigned int flags; unsigned short rsv_window_size; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 890b8947c546..61fa09eb2501 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -624,7 +624,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); - dir = dir_file->f_path.dentry->d_inode; + dir = file_inode(dir_file); if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; if (hinfo.hash_version <= DX_HASH_TEA) @@ -638,7 +638,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, } hinfo.hash = start_hash; hinfo.minor_hash = 0; - frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); + frame = dx_probe(NULL, file_inode(dir_file), &hinfo, frames, &err); if (!frame) return err; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 80a28b297279..dc149d123de5 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -110,7 +110,7 @@ static int ext4_readdir(struct file *filp, int i, stored; struct ext4_dir_entry_2 *de; int err; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; int ret = 0; int dir_has_error = 0; @@ -133,7 +133,7 @@ static int ext4_readdir(struct file *filp, * We don't set the inode dirty flag since it's not * critical that it get flushed back to the disk. */ - ext4_clear_inode_flag(filp->f_path.dentry->d_inode, + ext4_clear_inode_flag(file_inode(filp), EXT4_INODE_INDEX); } stored = 0; @@ -494,7 +494,7 @@ static int call_filldir(struct file *filp, void *dirent, { struct dir_private_info *info = filp->private_data; loff_t curr_pos; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb; int error; @@ -526,7 +526,7 @@ static int ext4_dx_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct dir_private_info *info = filp->private_data; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct fname *fname; int ret; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5ae1674ec12f..7817ca7c2bbf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4386,7 +4386,7 @@ static void ext4_falloc_update_inode(struct inode *inode, */ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); handle_t *handle; loff_t new_size; unsigned int max_blocks; @@ -4643,7 +4643,7 @@ static int ext4_xattr_fiemap(struct inode *inode, */ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; ext4_lblk_t first_block, stop_block; struct address_space *mapping = inode->i_mapping; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 405565a62277..c00ea7945eb5 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -167,7 +167,7 @@ static ssize_t ext4_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; /* diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 387c47c6cda9..8106dca95456 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1298,7 +1298,7 @@ int ext4_read_inline_dir(struct file *filp, int i, stored; struct ext4_dir_entry_2 *de; struct super_block *sb; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int ret, inline_size = 0; struct ext4_iloc iloc; void *dir_buf = NULL; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cbfe13bf5b2a..521bd4ab8abe 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2959,7 +2959,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private, int ret, bool is_async) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); ext4_io_end_t *io_end = iocb->private; /* if not async direct IO or dio with 0 bytes write, just return */ @@ -3553,7 +3553,7 @@ int ext4_can_truncate(struct inode *inode) int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; @@ -4926,7 +4926,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long len; int ret; struct file *file = vma->vm_file; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; handle_t *handle; get_block_t *get_block; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5747f52f7c72..c2f8e060f636 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -22,7 +22,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index d9cc5ee42f53..796f7ac03706 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -900,7 +900,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, pgoff_t orig_page_offset, int data_offset_in_page, int block_len_in_page, int uninit, int *err) { - struct inode *orig_inode = o_filp->f_dentry->d_inode; + struct inode *orig_inode = file_inode(o_filp); struct page *pagep[2] = {NULL, NULL}; handle_t *handle; ext4_lblk_t orig_blk_offset; @@ -1279,8 +1279,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_start, __u64 donor_start, __u64 len, __u64 *moved_len) { - struct inode *orig_inode = o_filp->f_dentry->d_inode; - struct inode *donor_inode = d_filp->f_dentry->d_inode; + struct inode *orig_inode = file_inode(o_filp); + struct inode *donor_inode = file_inode(d_filp); struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; ext4_lblk_t block_start = orig_start; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f9ed946a448e..bb97ad6905b2 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -937,7 +937,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); - dir = dir_file->f_path.dentry->d_inode; + dir = file_inode(dir_file); if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; if (hinfo.hash_version <= DX_HASH_TEA) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3d4fb81bacd5..4df78dd3f523 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -553,7 +553,7 @@ void ext4_error_file(struct file *file, const char *function, va_list args; struct va_format vaf; struct ext4_super_block *es; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); char pathname[80], *path; es = EXT4_SB(inode->i_sb)->s_es; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 951ed52748f6..fda0bcc0907f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -603,7 +603,7 @@ bool f2fs_empty_dir(struct inode *dir) static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) { unsigned long pos = file->f_pos; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); unsigned char *types = NULL; unsigned int bit_pos = 0, start_bit_pos = 0; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 58bf744dbf39..165012ef363a 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -698,7 +698,7 @@ out: static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); return __fat_readdir(inode, filp, dirent, filldir, 0, 0); } @@ -779,7 +779,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp, static long fat_dir_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; int short_only, both; @@ -819,7 +819,7 @@ FAT_IOCTL_FILLDIR_FUNC(fat_compat_ioctl_filldir, compat_dirent) static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct compat_dirent __user *d1 = compat_ptr(arg); int short_only, both; diff --git a/fs/fat/file.c b/fs/fat/file.c index a62e0ecbe2db..3978f8ca1823 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -32,7 +32,7 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); int is_dir = S_ISDIR(inode->i_mode); u32 attr, oldattr; @@ -116,7 +116,7 @@ out: long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); u32 __user *user_attr = (u32 __user *)arg; switch (cmd) { diff --git a/fs/fcntl.c b/fs/fcntl.c index 71a600a19f06..6599222536eb 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -30,7 +30,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) { - struct inode * inode = filp->f_path.dentry->d_inode; + struct inode * inode = file_inode(filp); int error = 0; /* diff --git a/fs/file_table.c b/fs/file_table.c index de9e9653d611..0f607ce89acc 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -447,7 +447,7 @@ void mark_files_ro(struct super_block *sb) lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { - if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) + if (!S_ISREG(file_inode(f)->i_mode)) continue; if (!file_count(f)) continue; diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index bd447e88f208..664b07a53870 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -237,7 +237,7 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags) static int vxfs_readdir(struct file *fp, void *retp, filldir_t filler) { - struct inode *ip = fp->f_path.dentry->d_inode; + struct inode *ip = file_inode(fp); struct super_block *sbp = ip->i_sb; u_long bsize = sbp->s_blocksize; u_long page, npages, block, pblocks, nblocks, offset; diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 75a20c092dd4..b7978b9f75ef 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file) { struct fuse_conn *fc; mutex_lock(&fuse_mutex); - fc = file->f_path.dentry->d_inode->i_private; + fc = file_inode(file)->i_private; if (fc) fc = fuse_conn_get(fc); mutex_unlock(&fuse_mutex); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7c09f9eb40c..80ba3950c40d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1160,7 +1160,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) int err; size_t nbytes; struct page *page; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 991ab2d484dd..44543df9f400 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -157,7 +157,7 @@ static const u32 gfs2_to_fsflags[32] = { static int gfs2_get_flags(struct file *filp, u32 __user *ptr) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; @@ -217,7 +217,7 @@ void gfs2_set_inode_flags(struct inode *inode) */ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *bh; @@ -293,7 +293,7 @@ out_drop_write: static int gfs2_set_flags(struct file *filp, u32 __user *ptr) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); u32 fsflags, gfsflags; if (get_user(fsflags, ptr)) @@ -336,7 +336,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) { - struct inode *inode = filep->f_dentry->d_inode; + struct inode *inode = file_inode(filep); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; @@ -386,7 +386,7 @@ static int gfs2_allocate_page_backing(struct page *page) static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned long last_index; @@ -673,8 +673,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct file *file = iocb->ki_filp; size_t writesize = iov_length(iov, nr_segs); - struct dentry *dentry = file->f_dentry; - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; ret = gfs2_rs_alloc(ip); @@ -772,7 +771,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; @@ -938,7 +937,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; - struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode); + struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; int flags; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b7eff078fe90..04af1cf7ae34 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1257,7 +1257,7 @@ fail: int gfs2_fitrim(struct file *filp, void __user *argp) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct gfs2_sbd *sdp = GFS2_SB(inode); struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); struct buffer_head *bh; diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 422dde2ec0a1..5f7f1abd5f6d 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -51,7 +51,7 @@ done: */ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; int len, err; char strbuf[HFS_MAX_NAMELEN]; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index d47f11658c17..3031dfdd2358 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -128,7 +128,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; + struct inode *inode = file_inode(file)->i_mapping->host; ssize_t ret; ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 6b9f921ef2fa..074e04589248 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -122,7 +122,7 @@ fail: static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; int len, err; char strbuf[HFSPLUS_MAX_STRLEN + 1]; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 799b336b59f9..dcd05be5344b 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -124,7 +124,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; + struct inode *inode = file_inode(file)->i_mapping->host; ssize_t ret; ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 09addc8615fa..e3c4c4209428 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -59,7 +59,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags) static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int flags = 0; @@ -75,7 +75,7 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int flags; int err = 0; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 457addc5c91f..ba6de25771ac 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -30,7 +30,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) return list_entry(inode, struct hostfs_inode_info, vfs_inode); } -#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) +#define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) static int hostfs_d_delete(const struct dentry *dentry) { diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 78e12b2e0ea2..546f6d39713a 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -25,7 +25,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) loff_t new_off = off + (whence == 1 ? filp->f_pos : 0); loff_t pos; struct quad_buffer_head qbh; - struct inode *i = filp->f_path.dentry->d_inode; + struct inode *i = file_inode(filp); struct hpfs_inode_info *hpfs_inode = hpfs_i(i); struct super_block *s = i->i_sb; @@ -57,7 +57,7 @@ fail: static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); struct quad_buffer_head qbh; struct hpfs_dirent *de; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index fbfe2df5624b..9f9dbeceeee7 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -152,7 +152,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf, retval = do_sync_write(file, buf, count, ppos); if (retval > 0) { hpfs_lock(file->f_path.dentry->d_sb); - hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1; + hpfs_i(file_inode(file))->i_dirty = 1; hpfs_unlock(file->f_path.dentry->d_sb); } return retval; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 43b315f2002b..74f55703be49 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -180,7 +180,7 @@ static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count, ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); ssize_t n; - read = file->f_path.dentry->d_inode->i_fop->read; + read = file_inode(file)->i_fop->read; if (!is_user) set_fs(KERNEL_DS); @@ -288,7 +288,7 @@ static ssize_t hppfs_write(struct file *file, const char __user *buf, struct file *proc_file = data->proc_file; ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - write = proc_file->f_path.dentry->d_inode->i_fop->write; + write = file_inode(proc_file)->i_fop->write; return (*write)(proc_file, buf, len, ppos); } @@ -513,7 +513,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where) loff_t (*llseek)(struct file *, loff_t, int); loff_t ret; - llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek; + llseek = file_inode(proc_file)->i_fop->llseek; if (llseek != NULL) { ret = (*llseek)(proc_file, off, where); if (ret < 0) @@ -561,7 +561,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) }); int err; - readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir; + readdir = file_inode(proc_file)->i_fop->readdir; proc_file->f_pos = file->f_pos; err = (*readdir)(proc_file, &dirent, hppfs_filldir); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 78bde32ea951..edb42ea60c76 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -97,7 +97,7 @@ static void huge_pagevec_release(struct pagevec *pvec) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); diff --git a/fs/inode.c b/fs/inode.c index 14084b72b259..67880e604399 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1655,7 +1655,7 @@ EXPORT_SYMBOL(file_remove_suid); int file_update_time(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct timespec now; int sync_it = 0; int ret; diff --git a/fs/ioctl.c b/fs/ioctl.c index 3bdad6d1f268..fd507fb460f8 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -175,7 +175,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) struct fiemap fiemap; struct fiemap __user *ufiemap = (struct fiemap __user *) arg; struct fiemap_extent_info fieinfo = { 0, }; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; u64 len; int error; @@ -424,7 +424,7 @@ EXPORT_SYMBOL(generic_block_fiemap); */ int ioctl_preallocate(struct file *filp, void __user *argp) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct space_resv sr; if (copy_from_user(&sr, argp, sizeof(sr))) @@ -449,7 +449,7 @@ int ioctl_preallocate(struct file *filp, void __user *argp) static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int __user *p = (int __user *)arg; switch (cmd) { @@ -512,7 +512,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, static int ioctl_fsfreeze(struct file *filp) { - struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; + struct super_block *sb = file_inode(filp)->i_sb; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -527,7 +527,7 @@ static int ioctl_fsfreeze(struct file *filp) static int ioctl_fsthaw(struct file *filp) { - struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; + struct super_block *sb = file_inode(filp)->i_sb; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -548,7 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, { int error = 0; int __user *argp = (int __user *)arg; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); switch (cmd) { case FIOCLEX: diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 0b3fa7974fa8..592e5115a561 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -296,7 +296,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount, */ static int zisofs_readpage(struct file *file, struct page *page) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; int err; int i, pcount, full_page; diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index f20437c068a0..a7d5c3c3d4e6 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -253,7 +253,7 @@ static int isofs_readdir(struct file *filp, int result; char *tmpname; struct iso_directory_record *tmpde; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); tmpname = (char *)__get_free_page(GFP_KERNEL); if (tmpname == NULL) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index ad7774d32095..acd46a4160cb 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -117,12 +117,12 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct jffs2_inode_info *f; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct jffs2_full_dirent *fd; unsigned long offset, curofs; jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n", - filp->f_path.dentry->d_inode->i_ino); + file_inode(filp)->i_ino); f = JFFS2_INODE_INFO(inode); diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index bc555ff417e9..93a1232894f6 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -58,7 +58,7 @@ static long jfs_map_ext2(unsigned long flags, int from) long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct jfs_inode_info *jfs_inode = JFS_IP(inode); unsigned int flags; diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 9197a1b0d02d..0ddbeceafc62 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3004,7 +3004,7 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) */ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *ip = filp->f_path.dentry->d_inode; + struct inode *ip = file_inode(filp); struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; int rc = 0; loff_t dtpos; /* legacy OS/2 style position */ diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index ca0a08001449..a2717408c478 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -178,7 +178,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) continue; if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; - if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) + if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0) continue; /* Alright, we found a lock. Set the return status * and wake up the caller diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 54f9e6ce0430..d7e1ec1c6827 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -127,7 +127,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) struct nlm_lock *lock = &argp->lock; nlmclnt_next_cookie(&argp->cookie); - memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh)); + memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); lock->caller = utsname()->nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 8d80c990dffd..e703318c41df 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -406,8 +406,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, __be32 ret; dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", - file->f_file->f_path.dentry->d_inode->i_sb->s_id, - file->f_file->f_path.dentry->d_inode->i_ino, + file_inode(file->f_file)->i_sb->s_id, + file_inode(file->f_file)->i_ino, lock->fl.fl_type, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end, @@ -513,8 +513,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, __be32 ret; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", - file->f_file->f_path.dentry->d_inode->i_sb->s_id, - file->f_file->f_path.dentry->d_inode->i_ino, + file_inode(file->f_file)->i_sb->s_id, + file_inode(file->f_file)->i_ino, lock->fl.fl_type, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -606,8 +606,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) int error; dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n", - file->f_file->f_path.dentry->d_inode->i_sb->s_id, - file->f_file->f_path.dentry->d_inode->i_ino, + file_inode(file->f_file)->i_sb->s_id, + file_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -635,8 +635,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l int status = 0; dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n", - file->f_file->f_path.dentry->d_inode->i_sb->s_id, - file->f_file->f_path.dentry->d_inode->i_ino, + file_inode(file->f_file)->i_sb->s_id, + file_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 0deb5f6c9dd4..b3a24b07d981 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) static inline void nlm_debug_print_file(char *msg, struct nlm_file *file) { - struct inode *inode = file->f_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file->f_file); dprintk("lockd: %s %s/%ld\n", msg, inode->i_sb->s_id, inode->i_ino); diff --git a/fs/locks.c b/fs/locks.c index a94e331a52a2..cb424a4fed71 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -334,7 +334,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, start = filp->f_pos; break; case SEEK_END: - start = i_size_read(filp->f_path.dentry->d_inode); + start = i_size_read(file_inode(filp)); break; default: return -EINVAL; @@ -384,7 +384,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, start = filp->f_pos; break; case SEEK_END: - start = i_size_read(filp->f_path.dentry->d_inode); + start = i_size_read(file_inode(filp)); break; default: return -EINVAL; @@ -627,7 +627,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) struct file_lock *cfl; lock_flocks(); - for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { + for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { if (!IS_POSIX(cfl)) continue; if (posix_locks_conflict(fl, cfl)) @@ -708,7 +708,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock **before; - struct inode * inode = filp->f_path.dentry->d_inode; + struct inode * inode = file_inode(filp); int error = 0; int found = 0; @@ -1002,7 +1002,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { - return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock); + return __posix_lock_file(file_inode(filp), fl, conflock); } EXPORT_SYMBOL(posix_lock_file); @@ -1326,8 +1326,8 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; lock_flocks(); - time_out_leases(filp->f_path.dentry->d_inode); - for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); + time_out_leases(file_inode(filp)); + for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); fl = fl->fl_next) { if (fl->fl_file == filp) { type = target_leasetype(fl); @@ -1843,7 +1843,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, if (copy_from_user(&flock, l, sizeof(flock))) goto out; - inode = filp->f_path.dentry->d_inode; + inode = file_inode(filp); /* Don't allow mandatory locks on files that may be memory mapped * and shared. @@ -1961,7 +1961,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, if (copy_from_user(&flock, l, sizeof(flock))) goto out; - inode = filp->f_path.dentry->d_inode; + inode = file_inode(filp); /* Don't allow mandatory locks on files that may be memory mapped * and shared. @@ -2030,7 +2030,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - if (!filp->f_path.dentry->d_inode->i_flock) + if (!file_inode(filp)->i_flock) return; lock.fl_type = F_UNLCK; @@ -2056,7 +2056,7 @@ EXPORT_SYMBOL(locks_remove_posix); */ void locks_remove_flock(struct file *filp) { - struct inode * inode = filp->f_path.dentry->d_inode; + struct inode * inode = file_inode(filp); struct file_lock *fl; struct file_lock **before; @@ -2152,7 +2152,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, fl_pid = fl->fl_pid; if (fl->fl_file != NULL) - inode = fl->fl_file->f_path.dentry->d_inode; + inode = file_inode(fl->fl_file); seq_printf(f, "%lld:%s ", id, pfx); if (IS_POSIX(fl)) { diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 26e4a941532f..b82751082112 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -284,7 +284,7 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) #define IMPLICIT_NODES 2 static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) { - struct inode *dir = file->f_dentry->d_inode; + struct inode *dir = file_inode(file); loff_t pos = file->f_pos - IMPLICIT_NODES; struct page *page; struct logfs_disk_dentry *dd; @@ -320,7 +320,7 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) static int logfs_readdir(struct file *file, void *buf, filldir_t filldir) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); ino_t pino = parent_ino(file->f_dentry); int err; diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 3886cded283c..c2219a6dd3c8 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -183,7 +183,7 @@ static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this) long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct logfs_inode *li = logfs_inode(inode); unsigned int oldflags, flags; int err; diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 685b2d981b87..a9ed6f36e6ea 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -85,7 +85,7 @@ static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi) static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) { unsigned long pos = filp->f_pos; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; unsigned offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; diff --git a/fs/namei.c b/fs/namei.c index 43a97ee1d4c8..df00b754631d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2778,7 +2778,7 @@ retry_lookup: goto out; if ((*opened & FILE_CREATED) || - !S_ISREG(file->f_path.dentry->d_inode->i_mode)) + !S_ISREG(file_inode(file)->i_mode)) will_truncate = false; audit_inode(name, file->f_path.dentry, 0); diff --git a/fs/namespace.c b/fs/namespace.c index 269919fa116d..50ca17d3cb45 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -384,7 +384,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write); */ int __mnt_want_write_file(struct file *file) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) return __mnt_want_write(file->f_path.mnt); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 1acdad7fcec7..c41e02932542 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -525,7 +525,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) if (!ncp_filp) goto out; error = -ENOTSOCK; - sock_inode = ncp_filp->f_path.dentry->d_inode; + sock_inode = file_inode(ncp_filp); if (!S_ISSOCK(sock_inode->i_mode)) goto out_fput; sock = SOCKET_I(sock_inode); @@ -564,7 +564,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) if (!server->info_filp) goto out_bdi; error = -ENOTSOCK; - sock_inode = server->info_filp->f_path.dentry->d_inode; + sock_inode = file_inode(server->info_filp); if (!S_ISSOCK(sock_inode->i_mode)) goto out_fput2; info_sock = SOCKET_I(sock_inode); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 6958adfaff08..5c1e9262219c 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -808,7 +808,7 @@ outrel: long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct ncp_server *server = NCP_SERVER(inode); uid_t uid = current_uid(); int need_drop_write = 0; diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 63d14a99483d..ee24df5af1f9 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -105,7 +105,7 @@ static const struct vm_operations_struct ncp_file_mmap = /* This is used for a general mmap of a ncp file */ int ncp_mmap(struct file *file, struct vm_area_struct *vma) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); DPRINTK("ncp_mmap: called\n"); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1b2d7eb93796..a8bd28cde7e2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -281,7 +281,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des for (i = 0; i < array->size; i++) { if (array->array[i].cookie == *desc->dir_cookie) { - struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(file_inode(desc->file)); struct nfs_open_dir_context *ctx = desc->file->private_data; new_pos = desc->current_index + i; @@ -629,7 +629,7 @@ out: static int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) { - struct inode *inode = desc->file->f_path.dentry->d_inode; + struct inode *inode = file_inode(desc->file); int ret; ret = nfs_readdir_xdr_to_array(desc, page, inode); @@ -660,7 +660,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc) static struct page *get_cache_page(nfs_readdir_descriptor_t *desc) { - return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, + return read_cache_page(file_inode(desc->file)->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); } @@ -764,7 +764,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, { struct page *page = NULL; int status; - struct inode *inode = desc->file->f_path.dentry->d_inode; + struct inode *inode = file_inode(desc->file); struct nfs_open_dir_context *ctx = desc->file->private_data; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3c2b893665ba..29f4a48a0ee6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -292,7 +292,7 @@ static int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index bc3968fa81e5..2ad8deaf7dbf 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -764,7 +764,7 @@ out: static ssize_t idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { - struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); + struct rpc_inode *rpci = RPC_I(file_inode(filp)); struct idmap *idmap = (struct idmap *)rpci->private; struct key_construction *cons; struct idmap_msg im; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ebeb94ce1b0b..548ae3113005 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -714,7 +714,7 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context); */ void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct nfs_inode *nfsi = NFS_I(inode); filp->private_data = get_nfs_open_context(ctx); @@ -747,7 +747,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c static void nfs_file_clear_open_context(struct file *filp) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 70efb63b1e42..43ea96ced28c 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -872,7 +872,7 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess static int nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 08ddcccb8887..13e6bb3e3fe5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -94,7 +94,7 @@ static int nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f084dac948e1..fc8de9016acf 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -662,7 +662,7 @@ nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) static int nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index e761ee95617f..497584c70366 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -101,7 +101,7 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, loff_t pos = *ppos; if (!pos) - nfsd_inject_get(file->f_dentry->d_inode->i_private, &val); + nfsd_inject_get(file_inode(file)->i_private, &val); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); if (pos < 0) @@ -133,10 +133,10 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); if (size > 0) - nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size); + nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); else { val = simple_strtoll(write_buf, NULL, 0); - nfsd_inject_set(file->f_dentry->d_inode->i_private, val); + nfsd_inject_set(file_inode(file)->i_private, val); } return len; /* on success, claim we got the whole input */ } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 74934284d9a7..2db7021b01ae 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -85,7 +85,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { - ino_t ino = file->f_path.dentry->d_inode->i_ino; + ino_t ino = file_inode(file)->i_ino; char *data; ssize_t rv; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d586117fa94a..a94245b4045f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -979,7 +979,7 @@ static void kill_suid(struct dentry *dentry) */ static int wait_for_concurrent_writes(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); static ino_t last_ino; static dev_t last_dev; int err = 0; @@ -1070,7 +1070,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) return err; - inode = file->f_path.dentry->d_inode; + inode = file_inode(file); /* Get readahead parameters */ ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); @@ -1957,7 +1957,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, offset = *offsetp; while (1) { - struct inode *dir_inode = file->f_path.dentry->d_inode; + struct inode *dir_inode = file_inode(file); unsigned int reclen; cdp->err = nfserr_eof; /* will be cleared on successful read */ diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index df1a7fb238d1..f30b017740a7 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -259,7 +259,7 @@ static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { loff_t pos = filp->f_pos; - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 61946883025c..89dc0886387d 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -67,7 +67,7 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; int ret = 0; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index fdb180769485..ef61c749641d 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -793,7 +793,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); void __user *argp = (void __user *)arg; switch (cmd) { diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 08b886f119ce..2bfe6dc413a0 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -174,7 +174,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) struct dnotify_struct **prev; struct inode *inode; - inode = filp->f_path.dentry->d_inode; + inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) return; @@ -296,7 +296,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) } /* dnotify only works on directories */ - inode = filp->f_path.dentry->d_inode; + inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) { error = -ENOTDIR; goto out_err; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9ff4a5ee6e20..5d8444268a16 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -466,7 +466,7 @@ static int fanotify_find_path(int dfd, const char __user *filename, ret = -ENOTDIR; if ((flags & FAN_MARK_ONLYDIR) && - !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) { + !(S_ISDIR(file_inode(f.file)->i_mode))) { fdput(f); goto out; } diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 99e36107ff60..aa411c3f20e9 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1101,7 +1101,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; loff_t fpos, i_size; - struct inode *bmp_vi, *vdir = filp->f_path.dentry->d_inode; + struct inode *bmp_vi, *vdir = file_inode(filp); struct super_block *sb = vdir->i_sb; ntfs_inode *ndir = NTFS_I(vdir); ntfs_volume *vol = NTFS_SB(sb); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 657743254eb9..db1ad26e02a7 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -569,7 +569,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, int ret, bool is_async) { - struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); int level; wait_queue_head_t *wq = ocfs2_ioend_wq(inode); @@ -626,7 +626,7 @@ static ssize_t ocfs2_direct_IO(int rw, unsigned long nr_segs) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; + struct inode *inode = file_inode(file)->i_mapping->host; /* * Fallback to buffered I/O if we see an inode without diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8fe4e2892ab9..ac0d4a0e8a41 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2015,7 +2015,7 @@ int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int lock_level = 0; trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 16b712d260d4..4c5fc8d77dc2 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -224,7 +224,7 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr) static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) { int event = 0; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct dlmfs_inode_private *ip = DLMFS_I(inode); poll_wait(file, &ip->ip_lockres.l_event, wait); @@ -245,7 +245,7 @@ static ssize_t dlmfs_file_read(struct file *filp, int bytes_left; ssize_t readlen, got; char *lvb_buf; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", inode->i_ino, count, *ppos); @@ -293,7 +293,7 @@ static ssize_t dlmfs_file_write(struct file *filp, int bytes_left; ssize_t writelen; char *lvb_buf; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", inode->i_ino, count, *ppos); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 37d313ede159..04098af9dbc8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1949,7 +1949,7 @@ out: int ocfs2_change_file_space(struct file *file, unsigned int cmd, struct ocfs2_space_resv *sr) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int ret; @@ -1977,7 +1977,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_space_resv sr; int change_size = 1; @@ -2232,7 +2232,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, loff_t old_size, *ppos = &iocb->ki_pos; u32 old_clusters; struct file *file = iocb->ki_filp; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); @@ -2516,7 +2516,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in, unsigned int flags) { int ret = 0, lock_level = 0; - struct inode *inode = in->f_path.dentry->d_inode; + struct inode *inode = file_inode(in); trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2546,7 +2546,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, { int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; struct file *filp = iocb->ki_filp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index f20edcbfe700..752f0b26221d 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -881,7 +881,7 @@ bail: long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned int flags; int new_clusters; int status; @@ -994,7 +994,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) { bool preserve; struct reflink_arguments args; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ocfs2_info info; void __user *argp = (void __user *)arg; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 47a87dda54ce..07c585b85000 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -62,7 +62,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, struct page *page) { int ret = VM_FAULT_NOPAGE; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; loff_t pos = page_offset(page); unsigned int len = PAGE_CACHE_SIZE; @@ -131,7 +131,7 @@ out: static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct buffer_head *di_bh = NULL; sigset_t oldset; int ret; @@ -180,13 +180,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) { int ret = 0, lock_level = 0; - ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode, + ret = ocfs2_inode_lock_atime(file_inode(file), file->f_vfsmnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto out; } - ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); + ocfs2_inode_unlock(file_inode(file), lock_level); out: vma->vm_ops = &ocfs2_file_vm_ops; return 0; diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 6083432f667e..9f8dcadd9a50 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -1055,7 +1055,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) { int status; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct ocfs2_move_extents range; struct ocfs2_move_extents_context *context = NULL; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 30a055049e16..1baffaadda41 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2927,7 +2927,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, u32 new_cluster, u32 new_len) { int ret = 0, partial; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ocfs2_caching_info *ci = INODE_CACHE(inode); struct super_block *sb = ocfs2_metadata_cache_get_super(ci); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); @@ -3020,7 +3020,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, u32 new_cluster, u32 new_len) { int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct ocfs2_caching_info *ci = INODE_CACHE(inode); int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index fb5b3ff79dc6..acbaebcad3a8 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -330,7 +330,7 @@ int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, u64 fsblock, int hindex) { - struct inode *dir = filp->f_dentry->d_inode; + struct inode *dir = file_inode(filp); struct buffer_head *bh; struct omfs_inode *oi; u64 self; @@ -405,7 +405,7 @@ out: static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *dir = filp->f_dentry->d_inode; + struct inode *dir = file_inode(filp); struct buffer_head *bh; loff_t offset, res; unsigned int hchain, hindex; diff --git a/fs/open.c b/fs/open.c index 9b33c0cbfacf..e08643feb574 100644 --- a/fs/open.c +++ b/fs/open.c @@ -228,7 +228,7 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64); int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); long ret; if (offset < 0 || len <= 0) @@ -426,7 +426,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) if (!f.file) goto out; - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) @@ -689,7 +689,7 @@ static int do_dentry_open(struct file *f, f->f_mode = FMODE_PATH; path_get(&f->f_path); - inode = f->f_path.dentry->d_inode; + inode = file_inode(f); if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, f->f_path.mnt); if (error) diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 2ad080faca34..ae47fa7efb9d 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -262,7 +262,7 @@ found: static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct op_inode_info *oi = OP_I(inode); struct device_node *dp = oi->u.node; struct device_node *child; diff --git a/fs/pipe.c b/fs/pipe.c index bd3479db4b62..39baf6c3ebb0 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -361,7 +361,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct pipe_inode_info *pipe; int do_wakeup; ssize_t ret; @@ -486,7 +486,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, unsigned long nr_segs, loff_t ppos) { struct file *filp = iocb->ki_filp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct pipe_inode_info *pipe; ssize_t ret; int do_wakeup; @@ -677,7 +677,7 @@ bad_pipe_w(struct file *filp, const char __user *buf, size_t count, static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct pipe_inode_info *pipe; int count, buf, nrbufs; @@ -705,7 +705,7 @@ static unsigned int pipe_poll(struct file *filp, poll_table *wait) { unsigned int mask; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct pipe_inode_info *pipe = inode->i_pipe; int nrbufs; @@ -758,7 +758,7 @@ pipe_release(struct inode *inode, int decr, int decw) static int pipe_read_fasync(int fd, struct file *filp, int on) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int retval; mutex_lock(&inode->i_mutex); @@ -772,7 +772,7 @@ pipe_read_fasync(int fd, struct file *filp, int on) static int pipe_write_fasync(int fd, struct file *filp, int on) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int retval; mutex_lock(&inode->i_mutex); @@ -786,7 +786,7 @@ pipe_write_fasync(int fd, struct file *filp, int on) static int pipe_rdwr_fasync(int fd, struct file *filp, int on) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct pipe_inode_info *pipe = inode->i_pipe; int retval; @@ -1226,7 +1226,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, */ struct pipe_inode_info *get_pipe_info(struct file *file) { - struct inode *i = file->f_path.dentry->d_inode; + struct inode *i = file_inode(file); return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b43ff77a51e..760268d6cba6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -383,7 +383,7 @@ static int lstats_open(struct inode *inode, struct file *file) static ssize_t lstats_write(struct file *file, const char __user *buf, size_t count, loff_t *offs) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; @@ -602,7 +602,7 @@ static const struct inode_operations proc_def_inode_operations = { static ssize_t proc_info_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); unsigned long page; ssize_t length; struct task_struct *task = get_proc_task(inode); @@ -668,7 +668,7 @@ static const struct file_operations proc_single_file_operations = { static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; if (!task) @@ -869,7 +869,7 @@ static const struct file_operations proc_environ_operations = { static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; int oom_adj = OOM_ADJUST_MIN; size_t len; @@ -916,7 +916,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, goto out; } - task = get_proc_task(file->f_path.dentry->d_inode); + task = get_proc_task(file_inode(file)); if (!task) { err = -ESRCH; goto out; @@ -976,7 +976,7 @@ static const struct file_operations proc_oom_adj_operations = { static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; short oom_score_adj = OOM_SCORE_ADJ_MIN; unsigned long flags; @@ -1019,7 +1019,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, goto out; } - task = get_proc_task(file->f_path.dentry->d_inode); + task = get_proc_task(file_inode(file)); if (!task) { err = -ESRCH; goto out; @@ -1067,7 +1067,7 @@ static const struct file_operations proc_oom_score_adj_operations = { static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; @@ -1084,7 +1084,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); char *page, *tmp; ssize_t length; uid_t loginuid; @@ -1142,7 +1142,7 @@ static const struct file_operations proc_loginuid_operations = { static ssize_t proc_sessionid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; @@ -1165,7 +1165,7 @@ static const struct file_operations proc_sessionid_operations = { static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; size_t len; int make_it_fail; @@ -1197,7 +1197,7 @@ static ssize_t proc_fault_inject_write(struct file * file, make_it_fail = simple_strtol(strstrip(buffer), &end, 0); if (*end) return -EINVAL; - task = get_proc_task(file->f_dentry->d_inode); + task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->make_it_fail = make_it_fail; @@ -1237,7 +1237,7 @@ static ssize_t sched_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct task_struct *p; p = get_proc_task(inode); @@ -1288,7 +1288,7 @@ static ssize_t sched_autogroup_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct task_struct *p; char buffer[PROC_NUMBUF]; int nice; @@ -1343,7 +1343,7 @@ static const struct file_operations proc_pid_sched_autogroup_operations = { static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct task_struct *p; char buffer[TASK_COMM_LEN]; @@ -2146,7 +2146,7 @@ out_no_task: static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); char *p = NULL; ssize_t length; struct task_struct *task = get_proc_task(inode); @@ -2167,7 +2167,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); char *page; ssize_t length; struct task_struct *task = get_proc_task(inode); @@ -2256,7 +2256,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = { static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; char buffer[PROC_NUMBUF]; size_t len; @@ -2308,7 +2308,7 @@ static ssize_t proc_coredump_filter_write(struct file *file, goto out_no_task; ret = -ESRCH; - task = get_proc_task(file->f_dentry->d_inode); + task = get_proc_task(file_inode(file)); if (!task) goto out_no_task; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 76ddae83daa5..7dfe548a28e8 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -42,7 +42,7 @@ static ssize_t __proc_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); char *page; ssize_t retval=0; int eof=0; @@ -188,7 +188,7 @@ static ssize_t proc_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; spin_lock(&pde->pde_unload_lock); @@ -209,7 +209,7 @@ static ssize_t proc_file_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; if (pde->write_proc) { @@ -460,7 +460,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, { unsigned int ino; int i; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int ret = 0; ino = inode->i_ino; @@ -522,7 +522,7 @@ out: int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); return proc_readdir_de(PDE(inode), filp, dirent, filldir); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 439ae6886507..38f5c119b806 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -144,7 +144,7 @@ void pde_users_dec(struct proc_dir_entry *pde) static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); loff_t rv = -EINVAL; loff_t (*llseek)(struct file *, loff_t, int); @@ -179,7 +179,7 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); @@ -201,7 +201,7 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); @@ -223,7 +223,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); unsigned int rv = DEFAULT_POLLMASK; unsigned int (*poll)(struct file *, struct poll_table_struct *); @@ -245,7 +245,7 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; long (*ioctl)(struct file *, unsigned int, unsigned long); @@ -268,7 +268,7 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne #ifdef CONFIG_COMPAT static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; long (*compat_ioctl)(struct file *, unsigned int, unsigned long); @@ -291,7 +291,7 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) { - struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct proc_dir_entry *pde = PDE(file_inode(file)); int rv = -EIO; int (*mmap)(struct file *, struct vm_area_struct *); diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index b1822dde55c2..ccfd99bd1c5a 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -45,7 +45,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) file = region->vm_file; if (file) { - struct inode *inode = region->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(region->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index fe72cd073dea..75df0d731110 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -163,7 +163,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent, struct net *net; ret = -EINVAL; - net = get_proc_task_net(filp->f_path.dentry->d_inode); + net = get_proc_task_net(file_inode(filp)); if (net != NULL) { ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); put_net(net); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1827d88ad58b..612df79cc6a1 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -478,7 +478,7 @@ out: static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, size_t count, loff_t *ppos, int write) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; ssize_t error; @@ -542,7 +542,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp) static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; unsigned int ret = DEFAULT_POLLMASK; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ca5ce7f9f800..3e636d864d56 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -271,7 +271,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) const char *name = NULL; if (file) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; @@ -743,7 +743,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return rv; if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return -EINVAL; - task = get_proc_task(file->f_path.dentry->d_inode); + task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mm = get_task_mm(task); @@ -1015,7 +1015,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, static ssize_t pagemap_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; struct pagemapread pm; int ret = -ESRCH; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 1ccfa537f5f5..56123a6f462e 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -149,7 +149,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, file = vma->vm_file; if (file) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 7b0329468a5d..28ce014b3cef 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -16,7 +16,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned int offset; struct buffer_head *bh; struct qnx4_inode_entry *de; diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index dc597353db3b..8798d065e400 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c @@ -117,7 +117,7 @@ static int qnx6_dir_longfilename(struct inode *inode, static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *s = inode->i_sb; struct qnx6_sb_info *sbi = QNX6_SB(s); loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1); diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index d5378d028589..8d5b438cc188 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -202,7 +202,7 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long pgoff, unsigned long flags) { unsigned long maxpages, lpages, nr, loop, ret; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct page **pages = NULL, **ptr, *page; loff_t isize; diff --git a/fs/read_write.c b/fs/read_write.c index bb34af315280..3ae6dbe828bf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -163,7 +163,7 @@ EXPORT_SYMBOL(no_llseek); loff_t default_llseek(struct file *file, loff_t offset, int whence) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); loff_t retval; mutex_lock(&inode->i_mutex); @@ -290,7 +290,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count loff_t pos; int retval = -EINVAL; - inode = file->f_path.dentry->d_inode; + inode = file_inode(file); if (unlikely((ssize_t) count < 0)) return retval; pos = *ppos; @@ -901,8 +901,8 @@ ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, if (!(out.file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL; - in_inode = in.file->f_path.dentry->d_inode; - out_inode = out.file->f_path.dentry->d_inode; + in_inode = file_inode(in.file); + out_inode = file_inode(out.file); retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); if (retval < 0) goto fput_out; diff --git a/fs/readdir.c b/fs/readdir.c index 5e69ef533b77..fee38e04fae4 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -22,7 +22,7 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int res = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) goto out; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 50302d6f8895..6165bd4784f6 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -268,7 +268,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t * new current position before returning. */ ) { - struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. + struct inode *inode = file_inode(file); // Inode of the file that we are writing to. /* To simplify coding at this time, we store locked pages in array for now */ struct reiserfs_transaction_handle th; diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 0c2185042d5f..15cb5fe6b425 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -21,7 +21,7 @@ */ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); unsigned int flags; int err = 0; diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index e60e87035bb3..9cc0740adffa 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -281,7 +281,7 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb) } #if defined( REISERFS_USE_OIDMAPF ) if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) { - loff_t size = sb_info->oidmap.mapf->f_path.dentry->d_inode->i_size; + loff_t size = file_inode(sb_info->oidmap.mapf)->i_size; total_used += size / sizeof(reiserfs_oidinterval_d_t); } #endif diff --git a/fs/romfs/super.c b/fs/romfs/super.c index fd7c5f60b46b..7e8d3a80bdab 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -147,7 +147,7 @@ static const struct address_space_operations romfs_aops = { */ static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *i = filp->f_dentry->d_inode; + struct inode *i = file_inode(filp); struct romfs_inode ri; unsigned long offset, maxoff; int j, ino, nextfh; diff --git a/fs/splice.c b/fs/splice.c index 6909d89d0da5..963213d56403 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1170,7 +1170,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * randomly drop data for eg socket -> socket splicing. Use the * piped splicing for that! */ - i_mode = in->f_path.dentry->d_inode->i_mode; + i_mode = file_inode(in)->i_mode; if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) return -EINVAL; diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index b381305c9a47..57dc70ebbb19 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -102,7 +102,7 @@ static int get_dir_index_using_offset(struct super_block *sb, static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; u64 block = squashfs_i(inode)->start + msblk->directory_table; int offset = squashfs_i(inode)->offset, length, dir_count, size, diff --git a/fs/sync.c b/fs/sync.c index 14eefeb44636..2c5d6639a66a 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -332,7 +332,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, if (!f.file) goto out; - i_mode = f.file->f_path.dentry->d_inode->i_mode; + i_mode = file_inode(f.file)->i_mode; ret = -ESPIPE; if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && !S_ISLNK(i_mode)) diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 614b2b544880..2ce9a5db6ab5 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -70,7 +70,7 @@ static ssize_t read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) { struct bin_buffer *bb = file->private_data; - int size = file->f_path.dentry->d_inode->i_size; + int size = file_inode(file)->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); char *temp; @@ -140,7 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, size_t bytes, loff_t *off) { struct bin_buffer *bb = file->private_data; - int size = file->f_path.dentry->d_inode->i_size; + int size = file_inode(file)->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); char *temp; @@ -469,7 +469,7 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd) mutex_lock(&sysfs_bin_lock); hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) { - struct inode *inode = bb->file->f_path.dentry->d_inode; + struct inode *inode = file_inode(bb->file); unmap_mapping_range(inode->i_mapping, 0, 0, 1); } diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index a77c42157620..3799e8dac3eb 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -68,7 +68,7 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n) static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) { unsigned long pos = filp->f_pos; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; unsigned offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 8a574776a493..de08c92f2e23 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -352,7 +352,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) struct qstr nm; union ubifs_key key; struct ubifs_dent_node *dent; - struct inode *dir = file->f_path.dentry->d_inode; + struct inode *dir = file_inode(file); struct ubifs_info *c = dir->i_sb->s_fs_info; dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5bc77817f382..fa5b347ec729 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1444,7 +1444,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 1a7e2d8bdbe9..648b143606cc 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -147,7 +147,7 @@ out_unlock: long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int flags, err; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); switch (cmd) { case FS_IOC_GETFLAGS: diff --git a/fs/udf/dir.c b/fs/udf/dir.c index eb8bfe2b89a5..b3e93f5e17c3 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -186,7 +186,7 @@ out: static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *dir = filp->f_path.dentry->d_inode; + struct inode *dir = file_inode(filp); int result; if (filp->f_pos == 0) { diff --git a/fs/udf/file.c b/fs/udf/file.c index 77b5953eaac8..4257a1f5302a 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -139,7 +139,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { ssize_t retval; struct file *file = iocb->ki_filp; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int err, pos; size_t count = iocb->ki_left; struct udf_inode_info *iinfo = UDF_I(inode); @@ -178,7 +178,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); long old_block, new_block; int result = -EINVAL; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index dbc90994715a..3a75ca09c506 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -433,7 +433,7 @@ static int ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) { loff_t pos = filp->f_pos; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index d0e9c74d3d96..75d854b0c439 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -78,14 +78,14 @@ xfs_swapext( goto out_put_tmp_file; } - if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) || - IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) { + if (IS_SWAPFILE(file_inode(f.file)) || + IS_SWAPFILE(file_inode(tmp.file))) { error = XFS_ERROR(EINVAL); goto out_put_tmp_file; } - ip = XFS_I(f.file->f_path.dentry->d_inode); - tip = XFS_I(tmp.file->f_path.dentry->d_inode); + ip = XFS_I(file_inode(f.file)); + tip = XFS_I(file_inode(tmp.file)); if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 67284edb84d7..f03bf1a456fb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -811,7 +811,7 @@ xfs_file_fallocate( loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); long error; loff_t new_size = 0; xfs_flock64_t bf; @@ -912,7 +912,7 @@ xfs_file_readdir( void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); xfs_inode_t *ip = XFS_I(inode); int error; size_t bufsize; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c1c3ef88a260..d681e34c2950 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -80,7 +80,7 @@ xfs_find_handle( f = fdget(hreq->fd); if (!f.file) return -EBADF; - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); } else { error = user_lpath((const char __user *)hreq->path, &path); if (error) @@ -168,7 +168,7 @@ xfs_handle_to_dentry( /* * Only allow handle opens under a directory. */ - if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode)) + if (!S_ISDIR(file_inode(parfilp)->i_mode)) return ERR_PTR(-ENOTDIR); if (hlen != sizeof(xfs_handle_t)) @@ -1334,7 +1334,7 @@ xfs_file_ioctl( unsigned int cmd, unsigned long p) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; void __user *arg = (void __user *)p; diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 1244274a5674..63b8fc432151 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -530,7 +530,7 @@ xfs_file_compat_ioctl( unsigned cmd, unsigned long p) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; void __user *arg = (void __user *)p; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7617ee04f066..3ab69777b4d8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2217,6 +2217,11 @@ static inline bool execute_ok(struct inode *inode) return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); } +static inline struct inode *file_inode(struct file *f) +{ + return f->f_path.dentry->d_inode; +} + /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. @@ -2239,7 +2244,7 @@ static inline int get_write_access(struct inode *inode) } static inline int deny_write_access(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; } static inline void put_write_access(struct inode * inode) @@ -2249,7 +2254,7 @@ static inline void put_write_access(struct inode * inode) static inline void allow_write_access(struct file *file) { if (file) - atomic_inc(&file->f_path.dentry->d_inode->i_writecount); + atomic_inc(&file_inode(file)->i_writecount); } #ifdef CONFIG_IMA static inline void i_readcount_dec(struct inode *inode) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 0fbfb4646d1b..a78680a92dba 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -244,7 +244,7 @@ static inline void fsnotify_open(struct file *file) static inline void fsnotify_close(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); fmode_t mode = file->f_mode; __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0c80d3f57a5b..70832951f97c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -281,7 +281,7 @@ static inline struct hstate *hstate_inode(struct inode *i) static inline struct hstate *hstate_file(struct file *f) { - return hstate_inode(f->f_dentry->d_inode); + return hstate_inode(file_inode(f)); } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f5a051a79273..0e62d84f9f7f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -291,7 +291,7 @@ int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return file->f_file->f_path.dentry->d_inode; + return file_inode(file->f_file); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 71a3ca18c873..963a8709f5fb 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -477,7 +477,7 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry) static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, size_t count, loff_t *off) { - struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode); + struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); char buffer[FILENT_SIZE]; ssize_t ret; @@ -498,13 +498,13 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, if (ret <= 0) return ret; - filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME; + file_inode(filp)->i_atime = file_inode(filp)->i_ctime = CURRENT_TIME; return ret; } static int mqueue_flush_file(struct file *filp, fl_owner_t id) { - struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode); + struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); spin_lock(&info->lock); if (task_tgid(current) == info->notify_owner) @@ -516,7 +516,7 @@ static int mqueue_flush_file(struct file *filp, fl_owner_t id) static unsigned int mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab) { - struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode); + struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp)); int retval = 0; poll_wait(filp, &info->wait_q, poll_tab); @@ -973,7 +973,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, goto out; } - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; @@ -1089,7 +1089,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, goto out; } - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; @@ -1249,7 +1249,7 @@ retry: goto out; } - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; @@ -1323,7 +1323,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, goto out; } - inode = f.file->f_path.dentry->d_inode; + inode = file_inode(f.file); if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; diff --git a/ipc/shm.c b/ipc/shm.c index 4fa6d8fee730..8c9402d298a0 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -193,7 +193,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0, shp->mlock_user); else if (shp->mlock_user) - user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, + user_shm_unlock(file_inode(shp->shm_file)->i_size, shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); @@ -529,7 +529,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. */ - file->f_dentry->d_inode->i_ino = shp->shm_perm.id; + file_inode(file)->i_ino = shp->shm_perm.id; ns->shm_tot += numpages; error = shp->shm_perm.id; @@ -678,7 +678,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp, { struct inode *inode; - inode = shp->shm_file->f_path.dentry->d_inode; + inode = file_inode(shp->shm_file); if (is_file_hugepages(shp->shm_file)) { struct address_space *mapping = inode->i_mapping; @@ -1173,7 +1173,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { - size = vma->vm_file->f_path.dentry->d_inode->i_size; + size = file_inode(vma->vm_file)->i_size; do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); /* * We discovered the size of the shm segment, so diff --git a/kernel/acct.c b/kernel/acct.c index 051e071a06e7..0d2981358e08 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -205,7 +205,7 @@ static int acct_on(struct filename *pathname) if (IS_ERR(file)) return PTR_ERR(file); - if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) { + if (!S_ISREG(file_inode(file)->i_mode)) { filp_close(file, NULL); return -EACCES; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4855892798fd..4fe52b3b6ef6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2637,7 +2637,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, un */ static inline struct cftype *__file_cft(struct file *file) { - if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations) + if (file_inode(file)->i_fop != &cgroup_file_operations) return ERR_PTR(-EINVAL); return __d_cft(file->f_dentry); } @@ -3852,7 +3852,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, /* the process need read permission on control file */ /* AV: shouldn't we check that it's been opened for read instead? */ - ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ); + ret = inode_permission(file_inode(cfile), MAY_READ); if (ret < 0) goto fail; @@ -5441,7 +5441,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) struct inode *inode; struct cgroup_subsys_state *css; - inode = f->f_dentry->d_inode; + inode = file_inode(f); /* check in cgroup filesystem dir */ if (inode->i_op != &cgroup_dir_inode_operations) return ERR_PTR(-EBADF); diff --git a/kernel/events/core.c b/kernel/events/core.c index 301079d06f24..3b106554b42e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3682,7 +3682,7 @@ unlock: static int perf_fasync(int fd, struct file *filp, int on) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct perf_event *event = filp->private_data; int retval; diff --git a/kernel/fork.c b/kernel/fork.c index c535f33bbb9c..4ff724f81f25 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -413,7 +413,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct address_space *mapping = file->f_mapping; get_file(file); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 4bd4faa6323a..397db02209ed 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -76,7 +76,7 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; + unsigned int irq = (int)(long)PDE(file_inode(file))->data; cpumask_var_t new_value; int err; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 78e2ecb20165..c057104bf05c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -251,7 +251,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) return PTR_ERR(file); err = -EINVAL; - ei = PROC_I(file->f_dentry->d_inode); + ei = PROC_I(file_inode(file)); ops = ei->ns_ops; if (nstype && (ops->type != nstype)) goto out; diff --git a/kernel/relay.c b/kernel/relay.c index e8cd2027abbd..01ab081ac53a 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1139,7 +1139,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, if (!desc->count) return 0; - mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); + mutex_lock(&file_inode(filp)->i_mutex); do { if (!relay_file_read_avail(buf, *ppos)) break; @@ -1159,7 +1159,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, *ppos = relay_file_read_end_pos(buf, read_start, ret); } } while (desc->count && ret); - mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); + mutex_unlock(&file_inode(filp)->i_mutex); return desc->written; } diff --git a/kernel/sys.c b/kernel/sys.c index 265b37690421..e3932ea50ec8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1792,14 +1792,14 @@ SYSCALL_DEFINE1(umask, int, mask) static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; - struct dentry *dentry; + struct inode *inode; int err; exe = fdget(fd); if (!exe.file) return -EBADF; - dentry = exe.file->f_path.dentry; + inode = file_inode(exe.file); /* * Because the original mm->exe_file points to executable file, make @@ -1807,11 +1807,11 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) * overall picture. */ err = -EACCES; - if (!S_ISREG(dentry->d_inode->i_mode) || + if (!S_ISREG(inode->i_mode) || exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - err = inode_permission(dentry->d_inode, MAY_EXEC); + err = inode_permission(inode, MAY_EXEC); if (err) goto exit; diff --git a/mm/fadvise.c b/mm/fadvise.c index a47f0f50c89f..6deaa6c04636 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -38,7 +38,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) if (!f.file) return -EBADF; - if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) { + if (S_ISFIFO(file_inode(f.file)->i_mode)) { ret = -ESPIPE; goto out; } diff --git a/mm/filemap.c b/mm/filemap.c index 83efee76a5c0..6a48a7ea8f4f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1711,7 +1711,7 @@ EXPORT_SYMBOL(filemap_fault); int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); int ret = VM_FAULT_LOCKED; sb_start_pagefault(inode->i_sb); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4f3ea0b1e57c..b97e806e5d9a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -127,7 +127,7 @@ static inline struct hugepage_subpool *subpool_inode(struct inode *inode) static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) { - return subpool_inode(vma->vm_file->f_dentry->d_inode); + return subpool_inode(file_inode(vma->vm_file)); } /* @@ -2482,7 +2482,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - mapping = vma->vm_file->f_dentry->d_inode->i_mapping; + mapping = file_inode(vma->vm_file)->i_mapping; /* * Take the mapping lock for the duration of the table walk. As diff --git a/mm/mmap.c b/mm/mmap.c index 35730ee9d515..22dfc01e9681 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -202,7 +202,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct file *file, struct address_space *mapping) { if (vma->vm_flags & VM_DENYWRITE) - atomic_inc(&file->f_path.dentry->d_inode->i_writecount); + atomic_inc(&file_inode(file)->i_writecount); if (vma->vm_flags & VM_SHARED) mapping->i_mmap_writable--; @@ -567,7 +567,7 @@ static void __vma_link_file(struct vm_area_struct *vma) struct address_space *mapping = file->f_mapping; if (vma->vm_flags & VM_DENYWRITE) - atomic_dec(&file->f_path.dentry->d_inode->i_writecount); + atomic_dec(&file_inode(file)->i_writecount); if (vma->vm_flags & VM_SHARED) mapping->i_mmap_writable++; @@ -1217,7 +1217,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return -EAGAIN; } - inode = file ? file->f_path.dentry->d_inode : NULL; + inode = file ? file_inode(file) : NULL; if (file) { switch (flags & MAP_TYPE) { @@ -1403,7 +1403,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, int error; struct rb_node **rb_link, *rb_parent; unsigned long charged = 0; - struct inode *inode = file ? file->f_path.dentry->d_inode : NULL; + struct inode *inode = file ? file_inode(file) : NULL; /* Clear old maps */ error = -ENOMEM; diff --git a/mm/nommu.c b/mm/nommu.c index 79c3cac87afa..f87d2173d0d0 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -941,7 +941,7 @@ static int validate_mmap_request(struct file *file, */ mapping = file->f_mapping; if (!mapping) - mapping = file->f_path.dentry->d_inode->i_mapping; + mapping = file_inode(file)->i_mapping; capabilities = 0; if (mapping && mapping->backing_dev_info) @@ -950,7 +950,7 @@ static int validate_mmap_request(struct file *file, if (!capabilities) { /* no explicit capabilities set, so assume some * defaults */ - switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { + switch (file_inode(file)->i_mode & S_IFMT) { case S_IFREG: case S_IFBLK: capabilities = BDI_CAP_MAP_COPY; @@ -985,11 +985,11 @@ static int validate_mmap_request(struct file *file, !(file->f_mode & FMODE_WRITE)) return -EACCES; - if (IS_APPEND(file->f_path.dentry->d_inode) && + if (IS_APPEND(file_inode(file)) && (file->f_mode & FMODE_WRITE)) return -EACCES; - if (locks_verify_locked(file->f_path.dentry->d_inode)) + if (locks_verify_locked(file_inode(file))) return -EAGAIN; if (!(capabilities & BDI_CAP_MAP_DIRECT)) @@ -1322,8 +1322,8 @@ unsigned long do_mmap_pgoff(struct file *file, continue; /* search for overlapping mappings on the same file */ - if (pregion->vm_file->f_path.dentry->d_inode != - file->f_path.dentry->d_inode) + if (file_inode(pregion->vm_file) != + file_inode(file)) continue; if (pregion->vm_pgoff >= pgend) diff --git a/mm/shmem.c b/mm/shmem.c index 5dd56f6efdbd..814d5546cb35 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1295,7 +1295,7 @@ unlock: static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); int error; int ret = VM_FAULT_LOCKED; @@ -1313,14 +1313,14 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #ifdef CONFIG_NUMA static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); } static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); pgoff_t index; index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; @@ -1330,7 +1330,7 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, int shmem_lock(struct file *file, int lock, struct user_struct *user) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct shmem_inode_info *info = SHMEM_I(inode); int retval = -ENOMEM; @@ -1465,7 +1465,7 @@ shmem_write_end(struct file *file, struct address_space *mapping, static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct address_space *mapping = inode->i_mapping; pgoff_t index; unsigned long offset; @@ -1808,7 +1808,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) static long shmem_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_falloc shmem_falloc; pgoff_t start, index, end; diff --git a/mm/swapfile.c b/mm/swapfile.c index e97a0e5aea91..ed393002fc09 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1699,7 +1699,7 @@ static int swap_show(struct seq_file *swap, void *v) len = seq_path(swap, &file->f_path, " \t\n\\"); seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", len < 40 ? 40 - len : 1, " ", - S_ISBLK(file->f_path.dentry->d_inode->i_mode) ? + S_ISBLK(file_inode(file)->i_mode) ? "partition" : "file\t", si->pages << (PAGE_SHIFT - 10), si->inuse_pages << (PAGE_SHIFT - 10), diff --git a/net/atm/proc.c b/net/atm/proc.c index 0d020de8d233..2518b5f8bb8a 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, page = get_zeroed_page(GFP_KERNEL); if (!page) return -ENOMEM; - dev = PDE(file->f_path.dentry->d_inode)->data; + dev = PDE(file_inode(file))->data; if (!dev->ops->proc_read) length = -EINVAL; else { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 8acce01b6dab..80e271d9e64b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -344,7 +344,7 @@ struct net *get_net_ns_by_fd(int fd) if (IS_ERR(file)) return ERR_CAST(file); - ei = PROC_I(file->f_dentry->d_inode); + ei = PROC_I(file_inode(file)); if (ei->ns_ops == &netns_operations) net = get_net(ei->ns); else diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 75e33a7048f8..5852b249054f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -657,7 +657,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) static ssize_t clusterip_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ofs) { - struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data; + struct clusterip_config *c = PDE(file_inode(file))->data; #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; unsigned long nodenum; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 978efc9b555a..c3cdcb47c149 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -540,7 +540,7 @@ static ssize_t recent_mt_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { - const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + const struct proc_dir_entry *pde = PDE(file_inode(file)); struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c0353d55d56f..c1ee3a8cf111 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -809,7 +809,7 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) struct sock *netlink_getsockbyfilp(struct file *filp) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct sock *sock; if (!S_ISSOCK(inode->i_mode)) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6e5c824b040b..294b4bf7ec6e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -616,7 +616,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; - struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe; + struct rpc_pipe *pipe = RPC_I(file_inode(filp))->pipe; struct gss_cl_ctx *ctx; uid_t uid; ssize_t err = -EFBIG; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 9afa4393c217..f3897d10f649 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -755,7 +755,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int err; if (count == 0) @@ -886,7 +886,7 @@ static ssize_t cache_write(struct file *filp, const char __user *buf, struct cache_detail *cd) { struct address_space *mapping = filp->f_mapping; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); ssize_t ret = -EINVAL; if (!cd->cache_parse) @@ -1454,7 +1454,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf, static ssize_t cache_read_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return cache_read(filp, buf, count, ppos, cd); } @@ -1462,14 +1462,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf, static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return cache_write(filp, buf, count, ppos, cd); } static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return cache_poll(filp, wait, cd); } @@ -1477,7 +1477,7 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) static long cache_ioctl_procfs(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct cache_detail *cd = PDE(inode)->data; return cache_ioctl(inode, filp, cmd, arg, cd); @@ -1546,7 +1546,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp) static ssize_t read_flush_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return read_flush(filp, buf, count, ppos, cd); } @@ -1555,7 +1555,7 @@ static ssize_t write_flush_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct cache_detail *cd = PDE(file_inode(filp))->data; return write_flush(filp, buf, count, ppos, cd); } @@ -1686,7 +1686,7 @@ EXPORT_SYMBOL_GPL(cache_destroy_net); static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_read(filp, buf, count, ppos, cd); } @@ -1694,14 +1694,14 @@ static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_write(filp, buf, count, ppos, cd); } static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_poll(filp, wait, cd); } @@ -1709,7 +1709,7 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) static long cache_ioctl_pipefs(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct cache_detail *cd = RPC_I(inode)->private; return cache_ioctl(inode, filp, cmd, arg, cd); @@ -1778,7 +1778,7 @@ static int release_flush_pipefs(struct inode *inode, struct file *filp) static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return read_flush(filp, buf, count, ppos, cd); } @@ -1787,7 +1787,7 @@ static ssize_t write_flush_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + struct cache_detail *cd = RPC_I(file_inode(filp))->private; return write_flush(filp, buf, count, ppos, cd); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index fd10981ea792..7b9b40224a27 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -284,7 +284,7 @@ out: static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; struct rpc_pipe_msg *msg; int res = 0; @@ -328,7 +328,7 @@ out_unlock: static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); int res; mutex_lock(&inode->i_mutex); @@ -342,7 +342,7 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of static unsigned int rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct rpc_inode *rpci = RPC_I(inode); unsigned int mask = POLLOUT | POLLWRNORM; @@ -360,7 +360,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) static long rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct rpc_pipe *pipe; int len; @@ -830,7 +830,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * responses to upcalls. They will result in calls to @msg->downcall. * * The @private argument passed here will be available to all these methods - * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. + * from the file pointer, via RPC_I(file_inode(file))->private. */ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, void *private, struct rpc_pipe *pipe) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index b6f4b994eb35..d0f6545b0010 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -99,7 +99,7 @@ unsigned int unix_tot_inflight; struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); /* * Socket ? diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 60f0c76a27d3..859abdaac1ea 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -349,8 +349,8 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm) unsigned int state; struct file_perms perms = {}; struct path_cond cond = { - bprm->file->f_path.dentry->d_inode->i_uid, - bprm->file->f_path.dentry->d_inode->i_mode + file_inode(bprm->file)->i_uid, + file_inode(bprm->file)->i_mode }; const char *name = NULL, *target = NULL, *info = NULL; int error = cap_bprm_set_creds(bprm); diff --git a/security/apparmor/file.c b/security/apparmor/file.c index cd21ec5b90af..fdaa50cb1876 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -449,8 +449,8 @@ int aa_file_perm(int op, struct aa_profile *profile, struct file *file, u32 request) { struct path_cond cond = { - .uid = file->f_path.dentry->d_inode->i_uid, - .mode = file->f_path.dentry->d_inode->i_mode + .uid = file_inode(file)->i_uid, + .mode = file_inode(file)->i_mode }; return aa_path_perm(op, profile, &file->f_path, PATH_DELEGATE_DELETED, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8c2a7f6b35e2..b21830eced41 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -379,7 +379,7 @@ static int apparmor_file_open(struct file *file, const struct cred *cred) struct aa_profile *profile; int error = 0; - if (!mediated_filesystem(file->f_path.dentry->d_inode)) + if (!mediated_filesystem(file_inode(file))) return 0; /* If in exec, permission is handled by bprm hooks. @@ -394,7 +394,7 @@ static int apparmor_file_open(struct file *file, const struct cred *cred) profile = aa_cred_profile(cred); if (!unconfined(profile)) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct path_cond cond = { inode->i_uid, inode->i_mode }; error = aa_path_perm(OP_OPEN, profile, &file->f_path, 0, @@ -432,7 +432,7 @@ static int common_file_perm(int op, struct file *file, u32 mask) BUG_ON(!fprofile); if (!file->f_path.mnt || - !mediated_filesystem(file->f_path.dentry->d_inode)) + !mediated_filesystem(file_inode(file))) return 0; profile = __aa_current_profile(); diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 0cea3db21657..27cb9eb42cc8 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -140,12 +140,12 @@ int ima_must_measure(struct inode *inode, int mask, int function) int ima_collect_measurement(struct integrity_iint_cache *iint, struct file *file) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); const char *filename = file->f_dentry->d_name.name; int result = 0; if (!(iint->flags & IMA_COLLECTED)) { - u64 i_version = file->f_dentry->d_inode->i_version; + u64 i_version = file_inode(file)->i_version; iint->ima_xattr.type = IMA_XATTR_DIGEST; result = ima_calc_hash(file, iint->ima_xattr.digest); @@ -182,7 +182,7 @@ void ima_store_measurement(struct integrity_iint_cache *iint, const char *op = "add_template_measure"; const char *audit_cause = "ENOMEM"; int result = -ENOMEM; - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct ima_template_entry *entry; int violation = 0; diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index b21ee5b5495a..81dcaa26401e 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -63,7 +63,7 @@ int ima_calc_hash(struct file *file, char *digest) file->f_mode |= FMODE_READ; read = 1; } - i_size = i_size_read(file->f_dentry->d_inode); + i_size = i_size_read(file_inode(file)); while (offset < i_size) { int rbuf_len; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index dba965de90d3..e7a147f7d371 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -132,7 +132,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint, */ void ima_file_free(struct file *file) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct integrity_iint_cache *iint; if (!iint_initialized || !S_ISREG(inode->i_mode)) @@ -148,7 +148,7 @@ void ima_file_free(struct file *file) static int process_measurement(struct file *file, const unsigned char *filename, int mask, int function) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file_inode(file); struct integrity_iint_cache *iint; unsigned char *pathname = NULL, *pathbuf = NULL; int rc = -ENOMEM, action, must_appraise; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 61a53367d029..2963c689f9c0 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1528,7 +1528,7 @@ static int file_has_perm(const struct cred *cred, u32 av) { struct file_security_struct *fsec = file->f_security; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct common_audit_data ad; u32 sid = cred_sid(cred); int rc; @@ -1957,7 +1957,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) struct task_security_struct *new_tsec; struct inode_security_struct *isec; struct common_audit_data ad; - struct inode *inode = bprm->file->f_path.dentry->d_inode; + struct inode *inode = file_inode(bprm->file); int rc; rc = cap_bprm_set_creds(bprm); @@ -2929,7 +2929,7 @@ static void selinux_inode_getsecid(const struct inode *inode, u32 *secid) static int selinux_revalidate_file_permission(struct file *file, int mask) { const struct cred *cred = current_cred(); - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */ if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) @@ -2941,7 +2941,7 @@ static int selinux_revalidate_file_permission(struct file *file, int mask) static int selinux_file_permission(struct file *file, int mask) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct file_security_struct *fsec = file->f_security; struct inode_security_struct *isec = inode->i_security; u32 sid = current_sid(); @@ -3218,7 +3218,7 @@ static int selinux_file_open(struct file *file, const struct cred *cred) struct inode_security_struct *isec; fsec = file->f_security; - isec = file->f_path.dentry->d_inode->i_security; + isec = file_inode(file)->i_security; /* * Save inode label and policy sequence number * at open-time so that selinux_file_permission diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 3a6e8731646c..ff427733c290 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -202,7 +202,7 @@ static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf, { char tmpbuf[TMPBUFLEN]; ssize_t length; - ino_t ino = filp->f_path.dentry->d_inode->i_ino; + ino_t ino = file_inode(filp)->i_ino; int handle_unknown = (ino == SEL_REJECT_UNKNOWN) ? security_get_reject_unknown() : !security_get_allow_unknown(); @@ -671,7 +671,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { static ssize_t selinux_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { - ino_t ino = file->f_path.dentry->d_inode->i_ino; + ino_t ino = file_inode(file)->i_ino; char *data; ssize_t rv; @@ -1042,8 +1042,7 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, ssize_t length; ssize_t ret; int cur_enforcing; - struct inode *inode = filep->f_path.dentry->d_inode; - unsigned index = inode->i_ino & SEL_INO_MASK; + unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; mutex_lock(&sel_mutex); @@ -1077,8 +1076,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, char *page = NULL; ssize_t length; int new_value; - struct inode *inode = filep->f_path.dentry->d_inode; - unsigned index = inode->i_ino & SEL_INO_MASK; + unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; mutex_lock(&sel_mutex); @@ -1486,13 +1484,11 @@ static int sel_make_avc_files(struct dentry *dir) static ssize_t sel_read_initcon(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct inode *inode; char *con; u32 sid, len; ssize_t ret; - inode = file->f_path.dentry->d_inode; - sid = inode->i_ino&SEL_INO_MASK; + sid = file_inode(file)->i_ino&SEL_INO_MASK; ret = security_sid_to_context(sid, &con, &len); if (ret) return ret; @@ -1553,7 +1549,7 @@ static inline u32 sel_ino_to_perm(unsigned long ino) static ssize_t sel_read_class(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - unsigned long ino = file->f_path.dentry->d_inode->i_ino; + unsigned long ino = file_inode(file)->i_ino; char res[TMPBUFLEN]; ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_class(ino)); return simple_read_from_buffer(buf, count, ppos, res, len); @@ -1567,7 +1563,7 @@ static const struct file_operations sel_class_ops = { static ssize_t sel_read_perm(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - unsigned long ino = file->f_path.dentry->d_inode->i_ino; + unsigned long ino = file_inode(file)->i_ino; char res[TMPBUFLEN]; ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_perm(ino)); return simple_read_from_buffer(buf, count, ppos, res, len); @@ -1584,7 +1580,7 @@ static ssize_t sel_read_policycap(struct file *file, char __user *buf, int value; char tmpbuf[TMPBUFLEN]; ssize_t length; - unsigned long i_ino = file->f_path.dentry->d_inode->i_ino; + unsigned long i_ino = file_inode(file)->i_ino; value = security_policycap_supported(i_ino & SEL_INO_MASK); length = scnprintf(tmpbuf, TMPBUFLEN, "%d", value); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 38be92ce901e..fa64740abb59 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -456,7 +456,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_bprm_set_creds(struct linux_binprm *bprm) { - struct inode *inode = bprm->file->f_path.dentry->d_inode; + struct inode *inode = file_inode(bprm->file); struct task_smack *bsp = bprm->cred->security; struct inode_smack *isp; int rc; @@ -1187,21 +1187,15 @@ static int smack_mmap_file(struct file *file, char *msmack; char *osmack; struct inode_smack *isp; - struct dentry *dp; int may; int mmay; int tmay; int rc; - if (file == NULL || file->f_dentry == NULL) - return 0; - - dp = file->f_dentry; - - if (dp->d_inode == NULL) + if (file == NULL) return 0; - isp = dp->d_inode->i_security; + isp = file_inode(file)->i_security; if (isp->smk_mmap == NULL) return 0; msmack = isp->smk_mmap; @@ -1359,7 +1353,7 @@ static int smack_file_receive(struct file *file) */ static int smack_file_open(struct file *file, const struct cred *cred) { - struct inode_smack *isp = file->f_path.dentry->d_inode->i_security; + struct inode_smack *isp = file_inode(file)->i_security; file->f_security = isp->smk_inode; diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c index 8592f2fc6ebb..fcf32783b66b 100644 --- a/security/tomoyo/securityfs_if.c +++ b/security/tomoyo/securityfs_if.c @@ -135,7 +135,7 @@ static const struct file_operations tomoyo_self_operations = { */ static int tomoyo_open(struct inode *inode, struct file *file) { - const int key = ((u8 *) file->f_path.dentry->d_inode->i_private) + const int key = ((u8 *) file_inode(file)->i_private) - ((u8 *) NULL); return tomoyo_open_control(key, file); } diff --git a/sound/core/info.c b/sound/core/info.c index 6b368d25073b..5bb97e7d325a 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -496,7 +496,7 @@ static long snd_info_entry_ioctl(struct file *file, unsigned int cmd, static int snd_info_entry_mmap(struct file *file, struct vm_area_struct *vma) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct snd_info_private_data *data; struct snd_info_entry *entry; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 09b4286c65f9..71ae86ca64ac 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1586,7 +1586,7 @@ static struct file *snd_pcm_file_fd(int fd, int *fput_needed) file = fget_light(fd, fput_needed); if (!file) return NULL; - inode = file->f_path.dentry->d_inode; + inode = file_inode(file); if (!S_ISCHR(inode->i_mode) || imajor(inode) != snd_major) { fput_light(file, *fput_needed); diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c index 536c4c0514d3..11ff7c55240c 100644 --- a/sound/oss/msnd_pinnacle.c +++ b/sound/oss/msnd_pinnacle.c @@ -642,7 +642,7 @@ static int mixer_ioctl(unsigned int cmd, unsigned long arg) static long dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int minor = iminor(file->f_path.dentry->d_inode); + int minor = iminor(file_inode(file)); int ret; if (cmd == OSS_GETVERSION) { @@ -1012,7 +1012,7 @@ static int dsp_write(const char __user *buf, size_t len) static ssize_t dev_read(struct file *file, char __user *buf, size_t count, loff_t *off) { - int minor = iminor(file->f_path.dentry->d_inode); + int minor = iminor(file_inode(file)); if (minor == dev.dsp_minor) return dsp_read(buf, count); else @@ -1021,7 +1021,7 @@ static ssize_t dev_read(struct file *file, char __user *buf, size_t count, loff_ static ssize_t dev_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { - int minor = iminor(file->f_path.dentry->d_inode); + int minor = iminor(file_inode(file)); if (minor == dev.dsp_minor) return dsp_write(buf, count); else diff --git a/sound/oss/soundcard.c b/sound/oss/soundcard.c index 7c7793a0eb25..e7780349cc55 100644 --- a/sound/oss/soundcard.c +++ b/sound/oss/soundcard.c @@ -143,7 +143,7 @@ static int get_mixer_levels(void __user * arg) static ssize_t sound_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - int dev = iminor(file->f_path.dentry->d_inode); + int dev = iminor(file_inode(file)); int ret = -EINVAL; /* @@ -176,7 +176,7 @@ static ssize_t sound_read(struct file *file, char __user *buf, size_t count, lof static ssize_t sound_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int dev = iminor(file->f_path.dentry->d_inode); + int dev = iminor(file_inode(file)); int ret = -EINVAL; mutex_lock(&soundcard_mutex); @@ -333,7 +333,7 @@ static int sound_mixer_ioctl(int mixdev, unsigned int cmd, void __user *arg) static long sound_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int len = 0, dtype; - int dev = iminor(file->f_dentry->d_inode); + int dev = iminor(file_inode(file)); long ret = -EINVAL; void __user *p = (void __user *)arg; @@ -406,7 +406,7 @@ static long sound_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static unsigned int sound_poll(struct file *file, poll_table * wait) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); int dev = iminor(inode); DEB(printk("sound_poll(dev=%d)\n", dev)); @@ -431,7 +431,7 @@ static int sound_mmap(struct file *file, struct vm_area_struct *vma) int dev_class; unsigned long size; struct dma_buffparms *dmap = NULL; - int dev = iminor(file->f_path.dentry->d_inode); + int dev = iminor(file_inode(file)); dev_class = dev & 0x0f; dev >>= 4; diff --git a/sound/sound_firmware.c b/sound/sound_firmware.c index 37711a5d0d6b..e14903468051 100644 --- a/sound/sound_firmware.c +++ b/sound/sound_firmware.c @@ -19,7 +19,7 @@ static int do_mod_firmware_load(const char *fn, char **fp) printk(KERN_INFO "Unable to load '%s'.\n", fn); return 0; } - l = i_size_read(filp->f_path.dentry->d_inode); + l = i_size_read(file_inode(filp)); if (l <= 0 || l > 131072) { printk(KERN_INFO "Invalid firmware '%s'\n", fn); -- cgit v1.2.3 From ad8ca3743cb7eef0eb8a2e87943c513201685eec Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Jan 2013 12:54:29 -0500 Subject: vfs: remove d_path_with_unreachable The last caller was removed >2 years ago in commit 7b2a69ba7. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/dcache.c | 31 ------------------------------- include/linux/dcache.h | 1 - 2 files changed, 32 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 19153a0a810c..8f15e8d71bba 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2722,37 +2722,6 @@ char *d_path(const struct path *path, char *buf, int buflen) } EXPORT_SYMBOL(d_path); -/** - * d_path_with_unreachable - return the path of a dentry - * @path: path to report - * @buf: buffer to return value in - * @buflen: buffer length - * - * The difference from d_path() is that this prepends "(unreachable)" - * to paths which are unreachable from the current process' root. - */ -char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) -{ - char *res = buf + buflen; - struct path root; - int error; - - if (path->dentry->d_op && path->dentry->d_op->d_dname) - return path->dentry->d_op->d_dname(path->dentry, buf, buflen); - - get_fs_root(current->fs, &root); - write_seqlock(&rename_lock); - error = path_with_deleted(path, &root, &res, &buflen); - if (error > 0) - error = prepend_unreachable(&res, &buflen); - write_sequnlock(&rename_lock); - path_put(&root); - if (error) - res = ERR_PTR(error); - - return res; -} - /* * Helper function for dentry_operations.d_dname() members */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c1754b59ddd3..63e96844a8e8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -333,7 +333,6 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); -extern char *d_path_with_unreachable(const struct path *, char *, int); extern char *dentry_path_raw(struct dentry *, char *, int); extern char *dentry_path(struct dentry *, char *, int); -- cgit v1.2.3 From a713ca2ab9d14dc5c86634bc445ce1f66552c169 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Jan 2013 18:27:00 -0500 Subject: constify __d_lookup() arguments Signed-off-by: Al Viro --- fs/dcache.c | 2 +- include/linux/dcache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 8f15e8d71bba..93d9f4e1a8ed 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1919,7 +1919,7 @@ EXPORT_SYMBOL(d_lookup); * * __d_lookup callers must be commented. */ -struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) +struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) { unsigned int len = name->len; unsigned int hash = name->hash; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 63e96844a8e8..969086c03301 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -295,7 +295,7 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ extern struct dentry *d_lookup(struct dentry *, struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup(struct dentry *, struct qstr *); +extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seq, struct inode *inode); -- cgit v1.2.3 From da2d8455ed7eb22d7642ecee43dc463ac42a1256 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Jan 2013 18:29:34 -0500 Subject: constify d_lookup() arguments Signed-off-by: Al Viro --- fs/dcache.c | 2 +- include/linux/dcache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 93d9f4e1a8ed..0e4b5fa6c9f9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1889,7 +1889,7 @@ seqretry: * dentry is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned if the dentry does not exist. */ -struct dentry *d_lookup(struct dentry *parent, struct qstr *name) +struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) { struct dentry *dentry; unsigned seq; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 969086c03301..03d169288423 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -293,7 +293,7 @@ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ -extern struct dentry *d_lookup(struct dentry *, struct qstr *); +extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, -- cgit v1.2.3 From d778df51c09264076fe0208c099ef7d428f21790 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 22 Feb 2013 16:32:12 -0800 Subject: mm: vmscan: save work scanning (almost) empty LRU lists In certain cases (kswapd reclaim, memcg target reclaim), a fixed minimum amount of pages is scanned from the LRU lists on each iteration, to make progress. Do not make this minimum bigger than the respective LRU list size, however, and save some busy work trying to isolate and reclaim pages that are not there. Empty LRU lists are quite common with memory cgroups in NUMA environments because there exists a set of LRU lists for each zone for each memory cgroup, while the memory of a single cgroup is expected to stay on just one node. The number of expected empty LRU lists is thus memcgs * (nodes - 1) * lru types Each attempt to reclaim from an empty LRU list does expensive size comparisons between lists, acquires the zone's lru lock etc. Avoid that. Signed-off-by: Johannes Weiner Reviewed-by: Rik van Riel Acked-by: Mel Gorman Reviewed-by: Michal Hocko Cc: Hugh Dickins Cc: Satoru Moriya Cc: Simon Jeons Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- mm/vmscan.c | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 68df9c17fbbb..8c66486a8ca8 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -156,7 +156,7 @@ enum { SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; -#define SWAP_CLUSTER_MAX 32 +#define SWAP_CLUSTER_MAX 32UL #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* diff --git a/mm/vmscan.c b/mm/vmscan.c index ff842d9a7714..e4521ba1ddd0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1761,15 +1761,17 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, out: for_each_evictable_lru(lru) { int file = is_file_lru(lru); + unsigned long size; unsigned long scan; - scan = get_lru_size(lruvec, lru); + size = get_lru_size(lruvec, lru); if (sc->priority || noswap || !vmscan_swappiness(sc)) { - scan >>= sc->priority; + scan = size >> sc->priority; if (!scan && force_scan) - scan = SWAP_CLUSTER_MAX; + scan = min(size, SWAP_CLUSTER_MAX); scan = div64_u64(scan * fraction[file], denominator); - } + } else + scan = size; nr[lru] = scan; } } -- cgit v1.2.3 From 7103f16dbff20fa969c9500902d980d17f953fa6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 22 Feb 2013 16:32:33 -0800 Subject: mm: compaction: make __compact_pgdat() and compact_pgdat() return void These functions always return 0. Formalise this. Cc: Jason Liu Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Minchan Kim Cc: Rik van Riel Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 5 ++--- mm/compaction.c | 12 +++++------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index cc7bddeaf553..091d72e70d8a 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -23,7 +23,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, bool sync, bool *contended); -extern int compact_pgdat(pg_data_t *pgdat, int order); +extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -80,9 +80,8 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, return COMPACT_CONTINUE; } -static inline int compact_pgdat(pg_data_t *pgdat, int order) +static inline void compact_pgdat(pg_data_t *pgdat, int order) { - return COMPACT_CONTINUE; } static inline void reset_isolation_suitable(pg_data_t *pgdat) diff --git a/mm/compaction.c b/mm/compaction.c index 0d0248db36d8..5d5b0b259bc9 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1085,7 +1085,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, /* Compact all zones within a node */ -static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) +static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) { int zoneid; struct zone *zone; @@ -1118,28 +1118,26 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) VM_BUG_ON(!list_empty(&cc->freepages)); VM_BUG_ON(!list_empty(&cc->migratepages)); } - - return 0; } -int compact_pgdat(pg_data_t *pgdat, int order) +void compact_pgdat(pg_data_t *pgdat, int order) { struct compact_control cc = { .order = order, .sync = false, }; - return __compact_pgdat(pgdat, &cc); + __compact_pgdat(pgdat, &cc); } -static int compact_node(int nid) +static void compact_node(int nid) { struct compact_control cc = { .order = -1, .sync = true, }; - return __compact_pgdat(NODE_DATA(nid), &cc); + __compact_pgdat(NODE_DATA(nid), &cc); } /* Compact all nodes in the system */ -- cgit v1.2.3 From bebeb3d68b24bb4132d452c5707fe321208bcbcd Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:32:37 -0800 Subject: mm: introduce mm_populate() for populating new vmas When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or with MCL_FUTURE in effect), we want to populate the pages within the newly created vmas. This may take a while as we may have to read pages from disk, so ideally we want to do this outside of the write-locked mmap_sem region. This change introduces mm_populate(), which is used to defer populating such mappings until after the mmap_sem write lock has been released. This is implemented as a generalization of the former do_mlock_pages(), which accomplished the same task but was using during mlock() / mlockall(). Signed-off-by: Michel Lespinasse Reported-by: Andy Lutomirski Acked-by: Rik van Riel Tested-by: Andy Lutomirski Cc: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 6 +++++- include/linux/mm.h | 18 +++++++++++++++--- ipc/shm.c | 12 +++++++----- mm/mlock.c | 17 +++++++++++------ mm/mmap.c | 20 +++++++++++++++----- mm/nommu.c | 5 ++++- mm/util.c | 6 +++++- 7 files changed, 62 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 71f613cf4a85..82eec7c7b4bb 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -103,6 +103,7 @@ static int aio_setup_ring(struct kioctx *ctx) unsigned nr_events = ctx->max_reqs; unsigned long size; int nr_pages; + bool populate; /* Compensate for the ring buffer's head/tail overlap entry */ nr_events += 2; /* 1 is required, 2 for good luck */ @@ -129,7 +130,8 @@ static int aio_setup_ring(struct kioctx *ctx) down_write(&ctx->mm->mmap_sem); info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, 0); + MAP_ANONYMOUS|MAP_PRIVATE, 0, + &populate); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); info->mmap_size = 0; @@ -147,6 +149,8 @@ static int aio_setup_ring(struct kioctx *ctx) aio_free_ring(ctx); return -EAGAIN; } + if (populate) + mm_populate(info->mmap_base, info->mmap_size); ctx->user_id = info->mmap_base; diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d9dcc35d6a1..da0a0fe970c2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1474,11 +1474,23 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap_pgoff(struct file *, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); +extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, unsigned long flags, + unsigned long pgoff, bool *populate); extern int do_munmap(struct mm_struct *, unsigned long, size_t); +#ifdef CONFIG_MMU +extern int __mm_populate(unsigned long addr, unsigned long len, + int ignore_errors); +static inline void mm_populate(unsigned long addr, unsigned long len) +{ + /* Ignore errors */ + (void) __mm_populate(addr, len, 1); +} +#else +static inline void mm_populate(unsigned long addr, unsigned long len) {} +#endif + /* These take the mm semaphore themselves */ extern unsigned long vm_brk(unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); diff --git a/ipc/shm.c b/ipc/shm.c index 4fa6d8fee730..9f047ba69e62 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -967,11 +967,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, unsigned long flags; unsigned long prot; int acc_mode; - unsigned long user_addr; struct ipc_namespace *ns; struct shm_file_data *sfd; struct path path; fmode_t f_mode; + bool populate = false; err = -EINVAL; if (shmid < 0) @@ -1070,13 +1070,15 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto invalid; } - user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); - *raddr = user_addr; + addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); + *raddr = addr; err = 0; - if (IS_ERR_VALUE(user_addr)) - err = (long)user_addr; + if (IS_ERR_VALUE(addr)) + err = (long)addr; invalid: up_write(¤t->mm->mmap_sem); + if (populate) + mm_populate(addr, size); out_fput: fput(file); diff --git a/mm/mlock.c b/mm/mlock.c index c9bd528b01d2..a296a49865df 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on) return error; } -static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) +/* + * __mm_populate - populate and/or mlock pages within a range of address space. + * + * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap + * flags. VMAs must be already marked with the desired vm_flags, and + * mmap_sem must not be held. + */ +int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) { struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; @@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) error = do_mlock(start, len, 1); up_write(¤t->mm->mmap_sem); if (!error) - error = do_mlock_pages(start, len, 0); + error = __mm_populate(start, len, 0); return error; } @@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags) capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); up_write(¤t->mm->mmap_sem); - if (!ret && (flags & MCL_CURRENT)) { - /* Ignore errors */ - do_mlock_pages(0, TASK_SIZE, 1); - } + if (!ret && (flags & MCL_CURRENT)) + mm_populate(0, TASK_SIZE); out: return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index 09da0b264982..9b12e3047a86 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint) unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long pgoff) + unsigned long flags, unsigned long pgoff, + bool *populate) { struct mm_struct * mm = current->mm; struct inode *inode; vm_flags_t vm_flags; + *populate = false; + /* * Does the application expect PROT_READ to imply PROT_EXEC? * @@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - return mmap_region(file, addr, len, flags, vm_flags, pgoff); + addr = mmap_region(file, addr, len, flags, vm_flags, pgoff); + if (!IS_ERR_VALUE(addr) && + ((vm_flags & VM_LOCKED) || + (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) + *populate = true; + return addr; } SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, @@ -1531,10 +1539,12 @@ out: vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - if (!mlock_vma_pages_range(vma, addr, addr + len)) + if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || + vma == get_gate_vma(current->mm))) mm->locked_vm += (len >> PAGE_SHIFT); - } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) - make_pages_present(addr, addr + len); + else + vma->vm_flags &= ~VM_LOCKED; + } if (file) uprobe_mmap(vma); diff --git a/mm/nommu.c b/mm/nommu.c index b20db4e22263..7296a5a280e7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff) + unsigned long pgoff, + bool *populate) { struct vm_area_struct *vma; struct vm_region *region; @@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file, kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); + *populate = false; + /* decide whether we should attempt the mapping, and if so what sort of * mapping */ ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, diff --git a/mm/util.c b/mm/util.c index c55e26b17d93..13467e043e9e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, { unsigned long ret; struct mm_struct *mm = current->mm; + bool populate; ret = security_mmap_file(file, prot, flag); if (!ret) { down_write(&mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); + ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, + &populate); up_write(&mm->mmap_sem); + if (!IS_ERR_VALUE(ret) && populate) + mm_populate(ret, len); } return ret; } -- cgit v1.2.3 From c22c0d6344c362b1dde5d8e160d3d07536aca120 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:32:43 -0800 Subject: mm: remove flags argument to mmap_region After the MAP_POPULATE handling has been moved to mmap_region() call sites, the only remaining use of the flags argument is to pass the MAP_NORESERVE flag. This can be just as easily handled by do_mmap_pgoff(), so do that and remove the mmap_region() flags parameter. [akpm@linux-foundation.org: remove double parens] Signed-off-by: Michel Lespinasse Acked-by: Rik van Riel Tested-by: Andy Lutomirski Cc: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/mm/elf.c | 1 - include/linux/mm.h | 3 +-- mm/fremap.c | 3 +-- mm/mmap.c | 33 ++++++++++++++++----------------- 4 files changed, 18 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 3cfa98bf9125..743c951c61b0 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -130,7 +130,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (!retval) { unsigned long addr = MEM_USER_INTRPT; addr = mmap_region(NULL, addr, INTRPT_SIZE, - MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); if (addr > (unsigned long) -PAGE_SIZE) diff --git a/include/linux/mm.h b/include/linux/mm.h index da0a0fe970c2..8332f3069fe3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1472,8 +1472,7 @@ extern int install_special_mapping(struct mm_struct *mm, extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, unsigned long flags, - vm_flags_t vm_flags, unsigned long pgoff); + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, bool *populate); diff --git a/mm/fremap.c b/mm/fremap.c index b42e32171530..503a72387087 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -204,9 +204,8 @@ get_write_lock: unsigned long addr; struct file *file = get_file(vma->vm_file); - flags = (flags & MAP_NONBLOCK) | MAP_POPULATE; addr = mmap_region(file, start, size, - flags, vma->vm_flags, pgoff); + vma->vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; diff --git a/mm/mmap.c b/mm/mmap.c index a23e30f9719c..d6bc39e939ab 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1291,7 +1291,21 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - addr = mmap_region(file, addr, len, flags, vm_flags, pgoff); + /* + * Set 'VM_NORESERVE' if we should not account for the + * memory use of this mapping. + */ + if (flags & MAP_NORESERVE) { + /* We honor MAP_NORESERVE if allowed to overcommit */ + if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) + vm_flags |= VM_NORESERVE; + + /* hugetlb applies strict overcommit unless MAP_NORESERVE */ + if (file && is_file_hugepages(file)) + vm_flags |= VM_NORESERVE; + } + + addr = mmap_region(file, addr, len, vm_flags, pgoff); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) @@ -1411,8 +1425,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) } unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, unsigned long flags, - vm_flags_t vm_flags, unsigned long pgoff) + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; @@ -1435,20 +1448,6 @@ munmap_back: if (!may_expand_vm(mm, len >> PAGE_SHIFT)) return -ENOMEM; - /* - * Set 'VM_NORESERVE' if we should not account for the - * memory use of this mapping. - */ - if ((flags & MAP_NORESERVE)) { - /* We honor MAP_NORESERVE if allowed to overcommit */ - if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) - vm_flags |= VM_NORESERVE; - - /* hugetlb applies strict overcommit unless MAP_NORESERVE */ - if (file && is_file_hugepages(file)) - vm_flags |= VM_NORESERVE; - } - /* * Private writable mapping: check memory availability */ -- cgit v1.2.3 From cea10a19b7972a1954c4a2d05a7de8db48b444fb Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:32:44 -0800 Subject: mm: directly use __mlock_vma_pages_range() in find_extend_vma() In find_extend_vma(), we don't need mlock_vma_pages_range() to verify the vma type - we know we're working with a stack. So, we can call directly into __mlock_vma_pages_range(), and remove the last make_pages_present() call site. Note that we don't use mm_populate() here, so we can't release the mmap_sem while allocating new stack pages. This is deemed acceptable, because the stack vmas grow by a bounded number of pages at a time, and these are anon pages so we don't have to read from disk to populate them. Signed-off-by: Michel Lespinasse Acked-by: Rik van Riel Tested-by: Andy Lutomirski Cc: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - mm/internal.h | 4 ++-- mm/memory.c | 24 ----------------------- mm/mlock.c | 57 +++--------------------------------------------------- mm/mmap.c | 10 ++++------ 5 files changed, 9 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8332f3069fe3..0c34d3486816 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1035,7 +1035,6 @@ static inline int fixup_user_fault(struct task_struct *tsk, } #endif -extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, int write); diff --git a/mm/internal.h b/mm/internal.h index 9ba21100ebf3..1c0c4cc0fcf7 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -162,8 +162,8 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent); #ifdef CONFIG_MMU -extern long mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end); +extern long __mlock_vma_pages_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, int *nonblocking); extern void munlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); static inline void munlock_vma_pages_all(struct vm_area_struct *vma) diff --git a/mm/memory.c b/mm/memory.c index 0abd07097ec6..7837ceacf090 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3824,30 +3824,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) } #endif /* __PAGETABLE_PMD_FOLDED */ -int make_pages_present(unsigned long addr, unsigned long end) -{ - int ret, len, write; - struct vm_area_struct * vma; - - vma = find_vma(current->mm, addr); - if (!vma) - return -ENOMEM; - /* - * We want to touch writable mappings with a write fault in order - * to break COW, except for shared mappings because these don't COW - * and we would not want to dirty them for nothing. - */ - write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE; - BUG_ON(addr >= end); - BUG_ON(end > vma->vm_end); - len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; - ret = get_user_pages(current, current->mm, addr, - len, write, 0, NULL, NULL); - if (ret < 0) - return ret; - return ret == len ? 0 : -EFAULT; -} - #if !defined(__HAVE_ARCH_GATE_AREA) #if defined(AT_SYSINFO_EHDR) diff --git a/mm/mlock.c b/mm/mlock.c index a296a49865df..569400a5d079 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -155,9 +155,8 @@ void munlock_vma_page(struct page *page) * * vma->vm_mm->mmap_sem must be held for at least read. */ -static long __mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - int *nonblocking) +long __mlock_vma_pages_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, int *nonblocking) { struct mm_struct *mm = vma->vm_mm; unsigned long addr = start; @@ -202,56 +201,6 @@ static int __mlock_posix_error_return(long retval) return retval; } -/** - * mlock_vma_pages_range() - mlock pages in specified vma range. - * @vma - the vma containing the specfied address range - * @start - starting address in @vma to mlock - * @end - end address [+1] in @vma to mlock - * - * For mmap()/mremap()/expansion of mlocked vma. - * - * return 0 on success for "normal" vmas. - * - * return number of pages [> 0] to be removed from locked_vm on success - * of "special" vmas. - */ -long mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - int nr_pages = (end - start) / PAGE_SIZE; - BUG_ON(!(vma->vm_flags & VM_LOCKED)); - - /* - * filter unlockable vmas - */ - if (vma->vm_flags & (VM_IO | VM_PFNMAP)) - goto no_mlock; - - if (!((vma->vm_flags & VM_DONTEXPAND) || - is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current->mm))) { - - __mlock_vma_pages_range(vma, start, end, NULL); - - /* Hide errors from mmap() and other callers */ - return 0; - } - - /* - * User mapped kernel pages or huge pages: - * make these pages present to populate the ptes, but - * fall thru' to reset VM_LOCKED--no need to unlock, and - * return nr_pages so these don't get counted against task's - * locked limit. huge pages are already counted against - * locked vm limit. - */ - make_pages_present(start, end); - -no_mlock: - vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */ - return nr_pages; /* error or pages NOT mlocked */ -} - /* * munlock_vma_pages_range() - munlock all pages in the vma range.' * @vma - vma containing range to be munlock()ed. @@ -303,7 +252,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, * * Filters out "special" vmas -- VM_LOCKED never gets set for these, and * munlock is a no-op. However, for some special vmas, we go ahead and - * populate the ptes via make_pages_present(). + * populate the ptes. * * For vmas that pass the filters, merge/split as appropriate. */ diff --git a/mm/mmap.c b/mm/mmap.c index d6bc39e939ab..8826c77513a9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2204,9 +2204,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) return vma; if (!prev || expand_stack(prev, addr)) return NULL; - if (prev->vm_flags & VM_LOCKED) { - mlock_vma_pages_range(prev, addr, prev->vm_end); - } + if (prev->vm_flags & VM_LOCKED) + __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL); return prev; } #else @@ -2232,9 +2231,8 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr) start = vma->vm_start; if (expand_stack(vma, addr)) return NULL; - if (vma->vm_flags & VM_LOCKED) { - mlock_vma_pages_range(vma, addr, start); - } + if (vma->vm_flags & VM_LOCKED) + __mlock_vma_pages_range(vma, addr, start, NULL); return vma; } #endif -- cgit v1.2.3 From 1869305009857cdeaabe6283bcdc2359c5784543 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:32:46 -0800 Subject: mm: introduce VM_POPULATE flag to better deal with racy userspace programs The vm_populate() code populates user mappings without constantly holding the mmap_sem. This makes it susceptible to racy userspace programs: the user mappings may change while vm_populate() is running, and in this case vm_populate() may end up populating the new mapping instead of the old one. In order to reduce the possibility of userspace getting surprised by this behavior, this change introduces the VM_POPULATE vma flag which gets set on vmas we want vm_populate() to work on. This way vm_populate() may still end up populating the new mapping after such a race, but only if the new mapping is also one that the user has requested (using MAP_SHARED, MAP_LOCKED or mlock) to be populated. Signed-off-by: Michel Lespinasse Acked-by: Rik van Riel Tested-by: Andy Lutomirski Cc: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + include/linux/mman.h | 4 +++- mm/fremap.c | 12 ++++++++++-- mm/mlock.c | 19 ++++++++++--------- mm/mmap.c | 4 +--- 5 files changed, 25 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0c34d3486816..9a5fcdeaa3a0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -87,6 +87,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ +#define VM_POPULATE 0x00001000 #define VM_LOCKED 0x00002000 #define VM_IO 0x00004000 /* Memory mapped I/O or similar */ diff --git a/include/linux/mman.h b/include/linux/mman.h index 9aa863da287f..61c7a87e5d2b 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -79,6 +79,8 @@ calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | - _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); + ((flags & MAP_LOCKED) ? (VM_LOCKED | VM_POPULATE) : 0) | + (((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE) ? + VM_POPULATE : 0); } #endif /* _LINUX_MMAN_H */ diff --git a/mm/fremap.c b/mm/fremap.c index 503a72387087..0cd4c11488ed 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -204,8 +204,10 @@ get_write_lock: unsigned long addr; struct file *file = get_file(vma->vm_file); - addr = mmap_region(file, start, size, - vma->vm_flags, pgoff); + vm_flags = vma->vm_flags; + if (!(flags & MAP_NONBLOCK)) + vm_flags |= VM_POPULATE; + addr = mmap_region(file, start, size, vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; @@ -224,6 +226,12 @@ get_write_lock: mutex_unlock(&mapping->i_mmap_mutex); } + if (!(flags & MAP_NONBLOCK) && !(vma->vm_flags & VM_POPULATE)) { + if (!has_write_lock) + goto get_write_lock; + vma->vm_flags |= VM_POPULATE; + } + if (vma->vm_flags & VM_LOCKED) { /* * drop PG_Mlocked flag for over-mapped range diff --git a/mm/mlock.c b/mm/mlock.c index 569400a5d079..d6378feb2950 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -340,9 +340,9 @@ static int do_mlock(unsigned long start, size_t len, int on) /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ - newflags = vma->vm_flags | VM_LOCKED; - if (!on) - newflags &= ~VM_LOCKED; + newflags = vma->vm_flags & ~VM_LOCKED; + if (on) + newflags |= VM_LOCKED | VM_POPULATE; tmp = vma->vm_end; if (tmp > end) @@ -402,7 +402,8 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) * range with the first VMA. Also, skip undesirable VMA types. */ nend = min(end, vma->vm_end); - if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_POPULATE)) != + VM_POPULATE) continue; if (nstart < vma->vm_start) nstart = vma->vm_start; @@ -475,18 +476,18 @@ static int do_mlockall(int flags) struct vm_area_struct * vma, * prev = NULL; if (flags & MCL_FUTURE) - current->mm->def_flags |= VM_LOCKED; + current->mm->def_flags |= VM_LOCKED | VM_POPULATE; else - current->mm->def_flags &= ~VM_LOCKED; + current->mm->def_flags &= ~(VM_LOCKED | VM_POPULATE); if (flags == MCL_FUTURE) goto out; for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { vm_flags_t newflags; - newflags = vma->vm_flags | VM_LOCKED; - if (!(flags & MCL_CURRENT)) - newflags &= ~VM_LOCKED; + newflags = vma->vm_flags & ~VM_LOCKED; + if (flags & MCL_CURRENT) + newflags |= VM_LOCKED | VM_POPULATE; /* Ignore errors */ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); diff --git a/mm/mmap.c b/mm/mmap.c index 8826c77513a9..39a3944e1658 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1306,9 +1306,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } addr = mmap_region(file, addr, len, vm_flags, pgoff); - if (!IS_ERR_VALUE(addr) && - ((vm_flags & VM_LOCKED) || - (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) + if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE)) *populate = true; return addr; } -- cgit v1.2.3 From 41badc15cbad0350de34408c1b0c690f9df76d4b Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:32:47 -0800 Subject: mm: make do_mmap_pgoff return populate as a size in bytes, not as a bool do_mmap_pgoff() rounds up the desired size to the next PAGE_SIZE multiple, however there was no equivalent code in mm_populate(), which caused issues. This could be fixed by introduced the same rounding in mm_populate(), however I think it's preferable to make do_mmap_pgoff() return populate as a size rather than as a boolean, so we don't have to duplicate the size rounding logic in mm_populate(). Signed-off-by: Michel Lespinasse Acked-by: Rik van Riel Tested-by: Andy Lutomirski Cc: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 5 ++--- include/linux/mm.h | 2 +- ipc/shm.c | 4 ++-- mm/mmap.c | 6 +++--- mm/nommu.c | 4 ++-- mm/util.c | 6 +++--- 6 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 82eec7c7b4bb..064bfbe37566 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -101,9 +101,8 @@ static int aio_setup_ring(struct kioctx *ctx) struct aio_ring *ring; struct aio_ring_info *info = &ctx->ring_info; unsigned nr_events = ctx->max_reqs; - unsigned long size; + unsigned long size, populate; int nr_pages; - bool populate; /* Compensate for the ring buffer's head/tail overlap entry */ nr_events += 2; /* 1 is required, 2 for good luck */ @@ -150,7 +149,7 @@ static int aio_setup_ring(struct kioctx *ctx) return -EAGAIN; } if (populate) - mm_populate(info->mmap_base, info->mmap_size); + mm_populate(info->mmap_base, populate); ctx->user_id = info->mmap_base; diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a5fcdeaa3a0..95db68e34b18 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1475,7 +1475,7 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff, bool *populate); + unsigned long pgoff, unsigned long *populate); extern int do_munmap(struct mm_struct *, unsigned long, size_t); #ifdef CONFIG_MMU diff --git a/ipc/shm.c b/ipc/shm.c index 9f047ba69e62..be3ec9ae454e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -971,7 +971,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, struct shm_file_data *sfd; struct path path; fmode_t f_mode; - bool populate = false; + unsigned long populate = 0; err = -EINVAL; if (shmid < 0) @@ -1078,7 +1078,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, invalid: up_write(¤t->mm->mmap_sem); if (populate) - mm_populate(addr, size); + mm_populate(addr, populate); out_fput: fput(file); diff --git a/mm/mmap.c b/mm/mmap.c index 39a3944e1658..44bb4d869884 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1163,13 +1163,13 @@ static inline unsigned long round_hint_to_min(unsigned long hint) unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, - bool *populate) + unsigned long *populate) { struct mm_struct * mm = current->mm; struct inode *inode; vm_flags_t vm_flags; - *populate = false; + *populate = 0; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1307,7 +1307,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, addr = mmap_region(file, addr, len, vm_flags, pgoff); if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE)) - *populate = true; + *populate = len; return addr; } diff --git a/mm/nommu.c b/mm/nommu.c index 7296a5a280e7..18c1b932e2c4 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1251,7 +1251,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long prot, unsigned long flags, unsigned long pgoff, - bool *populate) + unsigned long *populate) { struct vm_area_struct *vma; struct vm_region *region; @@ -1261,7 +1261,7 @@ unsigned long do_mmap_pgoff(struct file *file, kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); - *populate = false; + *populate = 0; /* decide whether we should attempt the mapping, and if so what sort of * mapping */ diff --git a/mm/util.c b/mm/util.c index 13467e043e9e..3704bf1bef94 100644 --- a/mm/util.c +++ b/mm/util.c @@ -355,7 +355,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, { unsigned long ret; struct mm_struct *mm = current->mm; - bool populate; + unsigned long populate; ret = security_mmap_file(file, prot, flag); if (!ret) { @@ -363,8 +363,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, &populate); up_write(&mm->mmap_sem); - if (!IS_ERR_VALUE(ret) && populate) - mm_populate(ret, len); + if (populate) + mm_populate(ret, populate); } return ret; } -- cgit v1.2.3 From 6677e3eaf4d78abd7b09133414c05dc3ec353e7f Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Fri, 22 Feb 2013 16:32:52 -0800 Subject: memory-hotplug: check whether all memory blocks are offlined or not when removing memory We remove the memory like this: 1. lock memory hotplug 2. offline a memory block 3. unlock memory hotplug 4. repeat 1-3 to offline all memory blocks 5. lock memory hotplug 6. remove memory(TODO) 7. unlock memory hotplug All memory blocks must be offlined before removing memory. But we don't hold the lock in the whole operation. So we should check whether all memory blocks are offlined before step6. Otherwise, kernel maybe panicked. Offlining a memory block and removing a memory device can be two different operations. Users can just offline some memory blocks without removing the memory device. For this purpose, the kernel has held lock_memory_hotplug() in __offline_pages(). To reuse the code for memory hot-remove, we repeat step 1-3 to offline all the memory blocks, repeatedly lock and unlock memory hotplug, but not hold the memory hotplug lock in the whole operation. Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen Acked-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 6 ++++++ include/linux/memory_hotplug.h | 1 + mm/memory_hotplug.c | 48 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 83d0b17ba1c2..a51007b79032 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -693,6 +693,12 @@ int offline_memory_block(struct memory_block *mem) return ret; } +/* return true if the memory block is offlined, otherwise, return false */ +bool is_memblock_offlined(struct memory_block *mem) +{ + return mem->state == MEM_OFFLINE; +} + /* * Initialize the sysfs support for memory devices... */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4a45c4e50025..8dd0950a6a7a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -247,6 +247,7 @@ extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern int offline_memory_block(struct memory_block *mem); +extern bool is_memblock_offlined(struct memory_block *mem); extern int remove_memory(u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6a82972aeae5..5d350f5c68e5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1429,6 +1429,54 @@ repeat: goto repeat; } + lock_memory_hotplug(); + + /* + * we have offlined all memory blocks like this: + * 1. lock memory hotplug + * 2. offline a memory block + * 3. unlock memory hotplug + * + * repeat step1-3 to offline the memory block. All memory blocks + * must be offlined before removing memory. But we don't hold the + * lock in the whole operation. So we should check whether all + * memory blocks are offlined. + */ + + mem = NULL; + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + section_nr = pfn_to_section_nr(pfn); + if (!present_section_nr(section_nr)) + continue; + + section = __nr_to_section(section_nr); + /* same memblock? */ + if (mem) + if ((section_nr >= mem->start_section_nr) && + (section_nr <= mem->end_section_nr)) + continue; + + mem = find_memory_block_hinted(section, mem); + if (!mem) + continue; + + ret = is_memblock_offlined(mem); + if (!ret) { + pr_warn("removing memory fails, because memory " + "[%#010llx-%#010llx] is onlined\n", + PFN_PHYS(section_nr_to_pfn(mem->start_section_nr)), + PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1)) - 1); + + kobject_put(&mem->dev.kobj); + unlock_memory_hotplug(); + return ret; + } + } + + if (mem) + kobject_put(&mem->dev.kobj); + unlock_memory_hotplug(); + return 0; } #else -- cgit v1.2.3 From 46c66c4b7ba0f9bb3e2ae3a3cfd40cd3472c8f80 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Fri, 22 Feb 2013 16:32:56 -0800 Subject: memory-hotplug: remove /sys/firmware/memmap/X sysfs When (hot)adding memory into system, /sys/firmware/memmap/X/{end, start, type} sysfs files are created. But there is no code to remove these files. This patch implements the function to remove them. We cannot free firmware_map_entry which is allocated by bootmem because there is no way to do so when the system is up. But we can at least remember the address of that memory and reuse the storage when the memory is added next time. This patch also introduces a new list map_entries_bootmem to link the map entries allocated by bootmem when they are removed, and a lock to protect it. And these entries will be reused when the memory is hot-added again. The idea is suggestted by Andrew Morton. NOTE: It is unsafe to return an entry pointer and release the map_entries_lock. So we should not hold the map_entries_lock separately in firmware_map_find_entry() and firmware_map_remove_entry(). Hold the map_entries_lock across find and remove /sys/firmware/memmap/X operation. And also, users of these two functions need to be careful to hold the lock when using these two functions. [tangchen@cn.fujitsu.com: Hold spinlock across find|remove /sys operation] [tangchen@cn.fujitsu.com: fix the wrong comments of map_entries] [tangchen@cn.fujitsu.com: reuse the storage of /sys/firmware/memmap/X/ allocated by bootmem] [tangchen@cn.fujitsu.com: fix section mismatch problem] [tangchen@cn.fujitsu.com: fix the doc format in drivers/firmware/memmap.c] Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen Reviewed-by: Kamezawa Hiroyuki Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Lai Jiangshan Cc: Tang Chen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Julian Calaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/memmap.c | 196 ++++++++++++++++++++++++++++++++++++++++--- include/linux/firmware-map.h | 6 ++ mm/memory_hotplug.c | 5 +- 3 files changed, 193 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 90723e65b081..0b5b5f619c75 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Data types ------------------------------------------------------------------ @@ -52,6 +53,9 @@ static ssize_t start_show(struct firmware_map_entry *entry, char *buf); static ssize_t end_show(struct firmware_map_entry *entry, char *buf); static ssize_t type_show(struct firmware_map_entry *entry, char *buf); +static struct firmware_map_entry * __meminit +firmware_map_find_entry(u64 start, u64 end, const char *type); + /* * Static data ----------------------------------------------------------------- */ @@ -79,7 +83,52 @@ static const struct sysfs_ops memmap_attr_ops = { .show = memmap_attr_show, }; -static struct kobj_type memmap_ktype = { +/* Firmware memory map entries. */ +static LIST_HEAD(map_entries); +static DEFINE_SPINLOCK(map_entries_lock); + +/* + * For memory hotplug, there is no way to free memory map entries allocated + * by boot mem after the system is up. So when we hot-remove memory whose + * map entry is allocated by bootmem, we need to remember the storage and + * reuse it when the memory is hot-added again. + */ +static LIST_HEAD(map_entries_bootmem); +static DEFINE_SPINLOCK(map_entries_bootmem_lock); + + +static inline struct firmware_map_entry * +to_memmap_entry(struct kobject *kobj) +{ + return container_of(kobj, struct firmware_map_entry, kobj); +} + +static void __meminit release_firmware_map_entry(struct kobject *kobj) +{ + struct firmware_map_entry *entry = to_memmap_entry(kobj); + + if (PageReserved(virt_to_page(entry))) { + /* + * Remember the storage allocated by bootmem, and reuse it when + * the memory is hot-added again. The entry will be added to + * map_entries_bootmem here, and deleted from &map_entries in + * firmware_map_remove_entry(). + */ + if (firmware_map_find_entry(entry->start, entry->end, + entry->type)) { + spin_lock(&map_entries_bootmem_lock); + list_add(&entry->list, &map_entries_bootmem); + spin_unlock(&map_entries_bootmem_lock); + } + + return; + } + + kfree(entry); +} + +static struct kobj_type __refdata memmap_ktype = { + .release = release_firmware_map_entry, .sysfs_ops = &memmap_attr_ops, .default_attrs = def_attrs, }; @@ -88,13 +137,6 @@ static struct kobj_type memmap_ktype = { * Registration functions ------------------------------------------------------ */ -/* - * Firmware memory map entries. No locking is needed because the - * firmware_map_add() and firmware_map_add_early() functions are called - * in firmware initialisation code in one single thread of execution. - */ -static LIST_HEAD(map_entries); - /** * firmware_map_add_entry() - Does the real work to add a firmware memmap entry. * @start: Start of the memory range. @@ -118,11 +160,25 @@ static int firmware_map_add_entry(u64 start, u64 end, INIT_LIST_HEAD(&entry->list); kobject_init(&entry->kobj, &memmap_ktype); + spin_lock(&map_entries_lock); list_add_tail(&entry->list, &map_entries); + spin_unlock(&map_entries_lock); return 0; } +/** + * firmware_map_remove_entry() - Does the real work to remove a firmware + * memmap entry. + * @entry: removed entry. + * + * The caller must hold map_entries_lock, and release it properly. + **/ +static inline void firmware_map_remove_entry(struct firmware_map_entry *entry) +{ + list_del(&entry->list); +} + /* * Add memmap entry on sysfs */ @@ -144,6 +200,78 @@ static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry) return 0; } +/* + * Remove memmap entry on sysfs + */ +static inline void remove_sysfs_fw_map_entry(struct firmware_map_entry *entry) +{ + kobject_put(&entry->kobj); +} + +/* + * firmware_map_find_entry_in_list() - Search memmap entry in a given list. + * @start: Start of the memory range. + * @end: End of the memory range (exclusive). + * @type: Type of the memory range. + * @list: In which to find the entry. + * + * This function is to find the memmap entey of a given memory range in a + * given list. The caller must hold map_entries_lock, and must not release + * the lock until the processing of the returned entry has completed. + * + * Return: Pointer to the entry to be found on success, or NULL on failure. + */ +static struct firmware_map_entry * __meminit +firmware_map_find_entry_in_list(u64 start, u64 end, const char *type, + struct list_head *list) +{ + struct firmware_map_entry *entry; + + list_for_each_entry(entry, list, list) + if ((entry->start == start) && (entry->end == end) && + (!strcmp(entry->type, type))) { + return entry; + } + + return NULL; +} + +/* + * firmware_map_find_entry() - Search memmap entry in map_entries. + * @start: Start of the memory range. + * @end: End of the memory range (exclusive). + * @type: Type of the memory range. + * + * This function is to find the memmap entey of a given memory range. + * The caller must hold map_entries_lock, and must not release the lock + * until the processing of the returned entry has completed. + * + * Return: Pointer to the entry to be found on success, or NULL on failure. + */ +static struct firmware_map_entry * __meminit +firmware_map_find_entry(u64 start, u64 end, const char *type) +{ + return firmware_map_find_entry_in_list(start, end, type, &map_entries); +} + +/* + * firmware_map_find_entry_bootmem() - Search memmap entry in map_entries_bootmem. + * @start: Start of the memory range. + * @end: End of the memory range (exclusive). + * @type: Type of the memory range. + * + * This function is similar to firmware_map_find_entry except that it find the + * given entry in map_entries_bootmem. + * + * Return: Pointer to the entry to be found on success, or NULL on failure. + */ +static struct firmware_map_entry * __meminit +firmware_map_find_entry_bootmem(u64 start, u64 end, const char *type) +{ + return firmware_map_find_entry_in_list(start, end, type, + &map_entries_bootmem); +} + /** * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do * memory hotplug. @@ -161,9 +289,19 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; - entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); - if (!entry) - return -ENOMEM; + entry = firmware_map_find_entry_bootmem(start, end, type); + if (!entry) { + entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + } else { + /* Reuse storage allocated by bootmem. */ + spin_lock(&map_entries_bootmem_lock); + list_del(&entry->list); + spin_unlock(&map_entries_bootmem_lock); + + memset(entry, 0, sizeof(*entry)); + } firmware_map_add_entry(start, end, type, entry); /* create the memmap entry */ @@ -196,6 +334,36 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type) return firmware_map_add_entry(start, end, type, entry); } +/** + * firmware_map_remove() - remove a firmware mapping entry + * @start: Start of the memory range. + * @end: End of the memory range. + * @type: Type of the memory range. + * + * removes a firmware mapping entry. + * + * Returns 0 on success, or -EINVAL if no entry. + **/ +int __meminit firmware_map_remove(u64 start, u64 end, const char *type) +{ + struct firmware_map_entry *entry; + + spin_lock(&map_entries_lock); + entry = firmware_map_find_entry(start, end - 1, type); + if (!entry) { + spin_unlock(&map_entries_lock); + return -EINVAL; + } + + firmware_map_remove_entry(entry); + spin_unlock(&map_entries_lock); + + /* remove the memmap entry */ + remove_sysfs_fw_map_entry(entry); + + return 0; +} + /* * Sysfs functions ------------------------------------------------------------- */ @@ -217,8 +385,10 @@ static ssize_t type_show(struct firmware_map_entry *entry, char *buf) return snprintf(buf, PAGE_SIZE, "%s\n", entry->type); } -#define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr) -#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj) +static inline struct memmap_attribute *to_memmap_attr(struct attribute *attr) +{ + return container_of(attr, struct memmap_attribute, attr); +} static ssize_t memmap_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h index 43fe52fcef0f..71d4fa721db9 100644 --- a/include/linux/firmware-map.h +++ b/include/linux/firmware-map.h @@ -25,6 +25,7 @@ int firmware_map_add_early(u64 start, u64 end, const char *type); int firmware_map_add_hotplug(u64 start, u64 end, const char *type); +int firmware_map_remove(u64 start, u64 end, const char *type); #else /* CONFIG_FIRMWARE_MEMMAP */ @@ -38,6 +39,11 @@ static inline int firmware_map_add_hotplug(u64 start, u64 end, const char *type) return 0; } +static inline int firmware_map_remove(u64 start, u64 end, const char *type) +{ + return 0; +} + #endif /* CONFIG_FIRMWARE_MEMMAP */ #endif /* _LINUX_FIRMWARE_MAP_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e5b91b12cec3..a776dbf3fa00 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1460,7 +1460,7 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) return ret; } -int remove_memory(u64 start, u64 size) +int __ref remove_memory(u64 start, u64 size) { unsigned long start_pfn, end_pfn; int ret = 0; @@ -1510,6 +1510,9 @@ repeat: return ret; } + /* remove memmap entry */ + firmware_map_remove(start, start + size, "System RAM"); + unlock_memory_hotplug(); return 0; -- cgit v1.2.3 From 24d335ca3606b610ec69c66a1e42760c96d89470 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:32:58 -0800 Subject: memory-hotplug: introduce new arch_remove_memory() for removing page table For removing memory, we need to remove page tables. But it depends on architecture. So the patch introduce arch_remove_memory() for removing page table. Now it only calls __remove_pages(). Note: __remove_pages() for some archtecuture is not implemented (I don't know how to implement it for s390). Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Acked-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 18 ++++++++++++++++++ arch/powerpc/mm/mem.c | 12 ++++++++++++ arch/s390/mm/init.c | 12 ++++++++++++ arch/sh/mm/init.c | 17 +++++++++++++++++ arch/tile/mm/init.c | 8 ++++++++ arch/x86/mm/init_32.c | 12 ++++++++++++ arch/x86/mm/init_64.c | 15 +++++++++++++++ include/linux/memory_hotplug.h | 1 + mm/memory_hotplug.c | 2 ++ 9 files changed, 97 insertions(+) (limited to 'include/linux') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b755ea92aea7..20bc967c7209 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -688,6 +688,24 @@ int arch_add_memory(int nid, u64 start, u64 size) return ret; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + if (ret) + pr_warn("%s: Problem encountered in __remove_pages() as" + " ret=%d\n", __func__, ret); + + return ret; +} +#endif #endif /* diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0dba5066c22a..09c64518b4d0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -133,6 +133,18 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(nid, zone, start_pfn, nr_pages); } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); + return __remove_pages(zone, start_pfn, nr_pages); +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ /* diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ae672f41c464..49ce6bb2c641 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -228,4 +228,16 @@ int arch_add_memory(int nid, u64 start, u64 size) vmem_remove_mapping(start, size); return rc; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + /* + * There is no hardware or firmware interface which could trigger a + * hot memory remove on s390. So there is nothing that needs to be + * implemented. + */ + return -EBUSY; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 82cc576fab15..105794037143 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -558,4 +558,21 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + if (unlikely(ret)) + pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, + ret); + + return ret; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index ef29d6c5e10e..2749515a0547 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -935,6 +935,14 @@ int remove_memory(u64 start, u64 size) { return -EINVAL; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + /* TODO */ + return -EBUSY; +} +#endif #endif struct kmem_cache *pgd_cache; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b299724f6e34..2d19001151d5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -862,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(nid, zone, start_pfn, nr_pages); } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); + return __remove_pages(zone, start_pfn, nr_pages); +} +#endif #endif /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3eba7f429880..b6dd1c480b30 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -707,6 +707,21 @@ int arch_add_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(arch_add_memory); +#ifdef CONFIG_MEMORY_HOTREMOVE +int __ref arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + WARN_ON_ONCE(ret); + + return ret; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_vsyscall; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8dd0950a6a7a..31a563bbd936 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -96,6 +96,7 @@ extern void __online_page_free(struct page *page); #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); +extern int arch_remove_memory(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages in a zone */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a776dbf3fa00..942b43f6d736 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1513,6 +1513,8 @@ repeat: /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); + arch_remove_memory(start, size); + unlock_memory_hotplug(); return 0; -- cgit v1.2.3 From 46723bfa540f0a1e494476a1734d03626a0bd1e0 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Fri, 22 Feb 2013 16:33:00 -0800 Subject: memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap For removing memmap region of sparse-vmemmap which is allocated bootmem, memmap region of sparse-vmemmap needs to be registered by get_page_bootmem(). So the patch searches pages of virtual mapping and registers the pages by get_page_bootmem(). NOTE: register_page_bootmem_memmap() is not implemented for ia64, ppc, s390, and sparc. So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node() when platform doesn't support it. It's implemented by adding a new Kconfig option named CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected by memory-hotplug feature fully supported archs(currently only on x86_64). Since we have 2 config options called MEMORY_HOTPLUG and MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately, and codes in function register_page_bootmem_info_node() are only used for collecting infomation for hot-remove, so reside it under MEMORY_HOTREMOVE. Besides page_isolation.c selected by MEMORY_ISOLATION under MEMORY_HOTPLUG is also such case, move it too. [mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE] [linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()] [mhocko@suse.cz: remove the arch specific functions without any implementation] [linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed] [rientjes@google.com: fix defined but not used warning] Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen Reviewed-by: Wu Jianguo Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Michal Hocko Signed-off-by: Lin Feng Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/discontig.c | 1 + arch/powerpc/mm/init_64.c | 1 + arch/sparc/mm/init_64.c | 1 + arch/x86/mm/init_64.c | 60 ++++++++++++++++++++++++++++++++++++++++++ include/linux/memory_hotplug.h | 13 +++++---- include/linux/mm.h | 3 ++- mm/Kconfig | 10 ++++++- mm/memory_hotplug.c | 35 +++++++++++++++++++++--- 8 files changed, 111 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c641333cd997..731bf84094b6 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -822,4 +822,5 @@ int __meminit vmemmap_populate(struct page *start_page, { return vmemmap_populate_basepages(start_page, size, node); } + #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 95a45293e5ac..42bf082f0124 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -297,5 +297,6 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 5c2c6e61facb..59c6fcfdc782 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2235,6 +2235,7 @@ void __meminit vmemmap_populate_print_last(void) node_start = 0; } } + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b6dd1c480b30..f17aa76dc1ae 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1034,6 +1034,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) return 0; } +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) +void register_page_bootmem_memmap(unsigned long section_nr, + struct page *start_page, unsigned long size) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned int nr_pages; + struct page *page; + + for (; addr < end; addr = next) { + pte_t *pte = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + get_page_bootmem(section_nr, pmd_page(*pmd), + MIX_SECTION_INFO); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + continue; + get_page_bootmem(section_nr, pte_page(*pte), + SECTION_INFO); + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + + nr_pages = 1 << (get_order(PMD_SIZE)); + page = pmd_page(*pmd); + while (nr_pages--) + get_page_bootmem(section_nr, page++, + SECTION_INFO); + } + } +} +#endif + void __meminit vmemmap_populate_print_last(void) { if (p_start) { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 31a563bbd936..4d523fe75ba1 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -174,17 +174,16 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) #endif /* CONFIG_NUMA */ #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ -#ifdef CONFIG_SPARSEMEM_VMEMMAP +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE +extern void register_page_bootmem_info_node(struct pglist_data *pgdat); +#else static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { } -static inline void put_page_bootmem(struct page *page) -{ -} -#else -extern void register_page_bootmem_info_node(struct pglist_data *pgdat); -extern void put_page_bootmem(struct page *page); #endif +extern void put_page_bootmem(struct page *page); +extern void get_page_bootmem(unsigned long ingo, struct page *page, + unsigned long type); /* * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug diff --git a/include/linux/mm.h b/include/linux/mm.h index 95db68e34b18..060557b9764f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1718,7 +1718,8 @@ int vmemmap_populate_basepages(struct page *start_page, unsigned long pages, int node); int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); - +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, + unsigned long size); enum mf_flags { MF_COUNT_INCREASED = 1 << 0, diff --git a/mm/Kconfig b/mm/Kconfig index 0b23db9a8791..2c7aea7106f9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -162,10 +162,16 @@ config MOVABLE_NODE Say Y here if you want to hotplug a whole node. Say N here if you want kernel to use memory on all nodes evenly. +# +# Only be set on architectures that have completely implemented memory hotplug +# feature. If you are not sure, don't touch it. +# +config HAVE_BOOTMEM_INFO_NODE + def_bool n + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" - select MEMORY_ISOLATION depends on SPARSEMEM || X86_64_ACPI_NUMA depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) @@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE config MEMORY_HOTREMOVE bool "Allow for memory hot remove" + select MEMORY_ISOLATION + select HAVE_BOOTMEM_INFO_NODE if X86_64 depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE depends on MIGRATION diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 942b43f6d736..6c90d222ec0a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -91,9 +91,8 @@ static void release_memory_resource(struct resource *res) } #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE -#ifndef CONFIG_SPARSEMEM_VMEMMAP -static void get_page_bootmem(unsigned long info, struct page *page, - unsigned long type) +void get_page_bootmem(unsigned long info, struct page *page, + unsigned long type) { page->lru.next = (struct list_head *) type; SetPagePrivate(page); @@ -128,6 +127,8 @@ void __ref put_page_bootmem(struct page *page) } +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE +#ifndef CONFIG_SPARSEMEM_VMEMMAP static void register_page_bootmem_info_section(unsigned long start_pfn) { unsigned long *usemap, mapsize, section_nr, i; @@ -161,6 +162,32 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) get_page_bootmem(section_nr, page, MIX_SECTION_INFO); } +#else /* CONFIG_SPARSEMEM_VMEMMAP */ +static void register_page_bootmem_info_section(unsigned long start_pfn) +{ + unsigned long *usemap, mapsize, section_nr, i; + struct mem_section *ms; + struct page *page, *memmap; + + if (!pfn_valid(start_pfn)) + return; + + section_nr = pfn_to_section_nr(start_pfn); + ms = __nr_to_section(section_nr); + + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + + register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); + + usemap = __nr_to_section(section_nr)->pageblock_flags; + page = virt_to_page(usemap); + + mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; + + for (i = 0; i < mapsize; i++, page++) + get_page_bootmem(section_nr, page, MIX_SECTION_INFO); +} +#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ void register_page_bootmem_info_node(struct pglist_data *pgdat) { @@ -203,7 +230,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat) register_page_bootmem_info_section(pfn); } } -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ +#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ static void grow_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) -- cgit v1.2.3 From ae9aae9eda2db71bf4b592f15618b0160eb07731 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:33:04 -0800 Subject: memory-hotplug: common APIs to support page tables hot-remove When memory is removed, the corresponding pagetables should alse be removed. This patch introduces some common APIs to support vmemmap pagetable and x86_64 architecture direct mapping pagetable removing. All pages of virtual mapping in removed memory cannot be freed if some pages used as PGD/PUD include not only removed memory but also other memory. So this patch uses the following way to check whether a page can be freed or not. 1) When removing memory, the page structs of the removed memory are filled with 0FD. 2) All page structs are filled with 0xFD on PT/PMD, PT/PMD can be cleared. In this case, the page used as PT/PMD can be freed. For direct mapping pages, update direct_pages_count[level] when we freed their pagetables. And do not free the pages again because they were freed when offlining. For vmemmap pages, free the pages and their pagetables. For larger pages, do not split them into smaller ones because there is no way to know if the larger page has been split. As a result, there is no way to decide when to split. We deal the larger pages in the following way: 1) For direct mapped pages, all the pages were freed when they were offlined. And since menmory offline is done section by section, all the memory ranges being removed are aligned to PAGE_SIZE. So only need to deal with unaligned pages when freeing vmemmap pages. 2) For vmemmap pages being used to store page_struct, if part of the larger page is still in use, just fill the unused part with 0xFD. And when the whole page is fulfilled with 0xFD, then free the larger page. [akpm@linux-foundation.org: fix typo in comment] [tangchen@cn.fujitsu.com: do not calculate direct mapping pages when freeing vmemmap pagetables] [tangchen@cn.fujitsu.com: do not free direct mapping pages twice] [tangchen@cn.fujitsu.com: do not free page split from hugepage one by one] [tangchen@cn.fujitsu.com: do not split pages when freeing pagetable pages] [akpm@linux-foundation.org: use pmd_page_vaddr()] [akpm@linux-foundation.org: fix used-uninitialised bug] Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Jianguo Wu Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable_types.h | 1 + arch/x86/mm/init_64.c | 304 +++++++++++++++++++++++++++++++++++ arch/x86/mm/pageattr.c | 47 +++--- include/linux/bootmem.h | 1 + 4 files changed, 331 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e6423002c10b..567b5d0632b2 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -351,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { } * as a pte too. */ extern pte_t *lookup_address(unsigned long address, unsigned int *level); +extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase); extern phys_addr_t slow_virt_to_phys(void *__address); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index f17aa76dc1ae..ca6cd403a275 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -707,6 +707,310 @@ int arch_add_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(arch_add_memory); +#define PAGE_INUSE 0xFD + +static void __meminit free_pagetable(struct page *page, int order) +{ + struct zone *zone; + bool bootmem = false; + unsigned long magic; + unsigned int nr_pages = 1 << order; + + /* bootmem page has reserved flag */ + if (PageReserved(page)) { + __ClearPageReserved(page); + bootmem = true; + + magic = (unsigned long)page->lru.next; + if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { + while (nr_pages--) + put_page_bootmem(page++); + } else + __free_pages_bootmem(page, order); + } else + free_pages((unsigned long)page_address(page), order); + + /* + * SECTION_INFO pages and MIX_SECTION_INFO pages + * are all allocated by bootmem. + */ + if (bootmem) { + zone = page_zone(page); + zone_span_writelock(zone); + zone->present_pages += nr_pages; + zone_span_writeunlock(zone); + totalram_pages += nr_pages; + } +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (pte_val(*pte)) + return; + } + + /* free a pte talbe */ + free_pagetable(pmd_page(*pmd), 0); + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); +} + +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +{ + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (pmd_val(*pmd)) + return; + } + + /* free a pmd talbe */ + free_pagetable(pud_page(*pud), 0); + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); +} + +/* Return true if pgd is changed, otherwise return false. */ +static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) +{ + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (pud_val(*pud)) + return false; + } + + /* free a pud table */ + free_pagetable(pgd_page(*pgd), 0); + spin_lock(&init_mm.page_table_lock); + pgd_clear(pgd); + spin_unlock(&init_mm.page_table_lock); + + return true; +} + +static void __meminit +remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, + bool direct) +{ + unsigned long next, pages = 0; + pte_t *pte; + void *page_addr; + phys_addr_t phys_addr; + + pte = pte_start + pte_index(addr); + for (; addr < end; addr = next, pte++) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + if (!pte_present(*pte)) + continue; + + /* + * We mapped [0,1G) memory as identity mapping when + * initializing, in arch/x86/kernel/head_64.S. These + * pagetables cannot be removed. + */ + phys_addr = pte_val(*pte) + (addr & PAGE_MASK); + if (phys_addr < (phys_addr_t)0x40000000) + return; + + if (IS_ALIGNED(addr, PAGE_SIZE) && + IS_ALIGNED(next, PAGE_SIZE)) { + /* + * Do not free direct mapping pages since they were + * freed when offlining, or simplely not in use. + */ + if (!direct) + free_pagetable(pte_page(*pte), 0); + + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + + /* For non-direct mapping, pages means nothing. */ + pages++; + } else { + /* + * If we are here, we are freeing vmemmap pages since + * direct mapped memory ranges to be freed are aligned. + * + * If we are not removing the whole page, it means + * other page structs in this page are being used and + * we canot remove them. So fill the unused page_structs + * with 0xFD, and remove the page when it is wholly + * filled with 0xFD. + */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pte_page(*pte)); + if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { + free_pagetable(pte_page(*pte), 0); + + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + } + } + } + + /* Call free_pte_table() in remove_pmd_table(). */ + flush_tlb_all(); + if (direct) + update_page_count(PG_LEVEL_4K, -pages); +} + +static void __meminit +remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, + bool direct) +{ + unsigned long next, pages = 0; + pte_t *pte_base; + pmd_t *pmd; + void *page_addr; + + pmd = pmd_start + pmd_index(addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!pmd_present(*pmd)) + continue; + + if (pmd_large(*pmd)) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE)) { + if (!direct) + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + pages++; + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pmd_page(*pmd)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PMD_SIZE)) { + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + } + } + + continue; + } + + pte_base = (pte_t *)pmd_page_vaddr(*pmd); + remove_pte_table(pte_base, addr, next, direct); + free_pte_table(pte_base, pmd); + } + + /* Call free_pmd_table() in remove_pud_table(). */ + if (direct) + update_page_count(PG_LEVEL_2M, -pages); +} + +static void __meminit +remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, + bool direct) +{ + unsigned long next, pages = 0; + pmd_t *pmd_base; + pud_t *pud; + void *page_addr; + + pud = pud_start + pud_index(addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + + if (!pud_present(*pud)) + continue; + + if (pud_large(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE)) { + if (!direct) + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + pages++; + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pud_page(*pud)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PUD_SIZE)) { + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + } + } + + continue; + } + + pmd_base = (pmd_t *)pud_page_vaddr(*pud); + remove_pmd_table(pmd_base, addr, next, direct); + free_pmd_table(pmd_base, pud); + } + + if (direct) + update_page_count(PG_LEVEL_1G, -pages); +} + +/* start and end are both virtual address. */ +static void __meminit +remove_pagetable(unsigned long start, unsigned long end, bool direct) +{ + unsigned long next; + pgd_t *pgd; + pud_t *pud; + bool pgd_changed = false; + + for (; start < end; start = next) { + next = pgd_addr_end(start, end); + + pgd = pgd_offset_k(start); + if (!pgd_present(*pgd)) + continue; + + pud = (pud_t *)pgd_page_vaddr(*pgd); + remove_pud_table(pud, start, next, direct); + if (free_pud_table(pud, pgd)) + pgd_changed = true; + } + + if (pgd_changed) + sync_global_pgds(start, end - 1); + + flush_tlb_all(); +} + #ifdef CONFIG_MEMORY_HOTREMOVE int __ref arch_remove_memory(u64 start, u64 size) { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a1b1c88f9caf..ca1f1c2bb7be 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -529,21 +529,13 @@ out_unlock: return do_split; } -static int split_large_page(pte_t *kpte, unsigned long address) +int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase) { unsigned long pfn, pfninc = 1; unsigned int i, level; - pte_t *pbase, *tmp; + pte_t *tmp; pgprot_t ref_prot; - struct page *base; - - if (!debug_pagealloc) - spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); - if (!debug_pagealloc) - spin_lock(&cpa_lock); - if (!base) - return -ENOMEM; + struct page *base = virt_to_page(pbase); spin_lock(&pgd_lock); /* @@ -551,10 +543,11 @@ static int split_large_page(pte_t *kpte, unsigned long address) * up for us already: */ tmp = lookup_address(address, &level); - if (tmp != kpte) - goto out_unlock; + if (tmp != kpte) { + spin_unlock(&pgd_lock); + return 1; + } - pbase = (pte_t *)page_address(base); paravirt_alloc_pte(&init_mm, page_to_pfn(base)); ref_prot = pte_pgprot(pte_clrhuge(*kpte)); /* @@ -601,17 +594,27 @@ static int split_large_page(pte_t *kpte, unsigned long address) * going on. */ __flush_tlb_all(); + spin_unlock(&pgd_lock); - base = NULL; + return 0; +} -out_unlock: - /* - * If we dropped out via the lookup_address check under - * pgd_lock then stick the page back into the pool: - */ - if (base) +static int split_large_page(pte_t *kpte, unsigned long address) +{ + pte_t *pbase; + struct page *base; + + if (!debug_pagealloc) + spin_unlock(&cpa_lock); + base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); + if (!debug_pagealloc) + spin_lock(&cpa_lock); + if (!base) + return -ENOMEM; + + pbase = (pte_t *)page_address(base); + if (__split_large_page(kpte, address, pbase)) __free_page(base); - spin_unlock(&pgd_lock); return 0; } diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3cd16ba82f15..cdc3bab01832 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat, unsigned long size); extern void free_bootmem(unsigned long physaddr, unsigned long size); extern void free_bootmem_late(unsigned long physaddr, unsigned long size); +extern void __free_pages_bootmem(struct page *page, unsigned int order); /* * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, -- cgit v1.2.3 From 0197518cd3672029618a16a57597946a094ac7a8 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:08 -0800 Subject: memory-hotplug: remove memmap of sparse-vmemmap Introduce a new API vmemmap_free() to free and remove vmemmap pagetables. Since pagetable implements are different, each architecture has to provide its own version of vmemmap_free(), just like vmemmap_populate(). Note: vmemmap_free() is not implemented for ia64, ppc, s390, and sparc. [mhocko@suse.cz: fix implicit declaration of remove_pagetable] Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Jianguo Wu Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 3 +++ arch/ia64/mm/discontig.c | 3 +++ arch/powerpc/mm/init_64.c | 4 ++++ arch/s390/mm/vmem.c | 4 ++++ arch/sparc/mm/init_64.c | 4 ++++ arch/x86/mm/init_64.c | 8 ++++++++ include/linux/mm.h | 3 +++ mm/sparse.c | 3 ++- 8 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f4dd585898c5..224b44ab534e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -434,4 +434,7 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } #endif /* CONFIG_ARM64_64K_PAGES */ +void vmemmap_free(struct page *memmap, unsigned long nr_pages) +{ +} #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 731bf84094b6..652fee097921 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -823,4 +823,7 @@ int __meminit vmemmap_populate(struct page *start_page, return vmemmap_populate_basepages(start_page, size, node); } +void vmemmap_free(struct page *memmap, unsigned long nr_pages) +{ +} #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 42bf082f0124..7e2246fb2f31 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -298,5 +298,9 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } +void vmemmap_free(struct page *memmap, unsigned long nr_pages) +{ +} + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 79699f46a443..e21aaf4f5cb6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -268,6 +268,10 @@ out: return ret; } +void vmemmap_free(struct page *memmap, unsigned long nr_pages) +{ +} + /* * Add memory segment to the segment list if it doesn't overlap with * an already present segment. diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 59c6fcfdc782..1588d33d5492 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2236,6 +2236,10 @@ void __meminit vmemmap_populate_print_last(void) } } +void vmemmap_free(struct page *memmap, unsigned long nr_pages) +{ +} + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4e58b83e6b2e..474e28f10815 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1011,6 +1011,14 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) flush_tlb_all(); } +void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages) +{ + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + + remove_pagetable(start, end, false); +} + static void __meminit kernel_physical_mapping_remove(unsigned long start, unsigned long end) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 060557b9764f..9d659491c0ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1718,6 +1718,9 @@ int vmemmap_populate_basepages(struct page *start_page, unsigned long pages, int node); int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); +#ifdef CONFIG_MEMORY_HOTPLUG +void vmemmap_free(struct page *memmap, unsigned long nr_pages); +#endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); diff --git a/mm/sparse.c b/mm/sparse.c index 66f0fd9d7964..46f6ea47d9ab 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -615,10 +615,11 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, } static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { - return; /* XXX: Not implemented yet */ + vmemmap_free(memmap, nr_pages); } static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { + vmemmap_free(memmap, nr_pages); } #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) -- cgit v1.2.3 From 60a5a19e7419ba0bc22ed01b3285e8940b42944c Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:14 -0800 Subject: memory-hotplug: remove sysfs file of node Introduce a new function try_offline_node() to remove sysfs file of node when all memory sections of this node are removed. If some memory sections of this node are not removed, this function does nothing. Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/acpi_memhotplug.c | 8 ++++-- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 58 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 034d3e72aa92..da1f82b445e0 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -280,9 +280,11 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) { - int result = 0; + int result = 0, nid; struct acpi_memory_info *info, *n; + nid = acpi_get_node(mem_device->device->handle); + list_for_each_entry_safe(info, n, &mem_device->res_list, list) { if (info->failed) /* The kernel does not use this memory block */ @@ -295,7 +297,9 @@ static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) */ return -EBUSY; - result = remove_memory(info->start_addr, info->length); + if (nid < 0) + nid = memory_add_physaddr_to_nid(info->start_addr); + result = remove_memory(nid, info->start_addr, info->length); if (result) return result; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4d523fe75ba1..69903ccf549e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -248,7 +248,7 @@ extern int arch_add_memory(int nid, u64 start, u64 size); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern int offline_memory_block(struct memory_block *mem); extern bool is_memblock_offlined(struct memory_block *mem); -extern int remove_memory(u64 start, u64 size); +extern int remove_memory(int nid, u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3f792375f326..aea6374f435a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -1679,7 +1680,58 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) return ret; } -int __ref remove_memory(u64 start, u64 size) +static int check_cpu_on_node(void *data) +{ + struct pglist_data *pgdat = data; + int cpu; + + for_each_present_cpu(cpu) { + if (cpu_to_node(cpu) == pgdat->node_id) + /* + * the cpu on this node isn't removed, and we can't + * offline this node. + */ + return -EBUSY; + } + + return 0; +} + +/* offline the node if all memory sections of this node are removed */ +static void try_offline_node(int nid) +{ + unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; + unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; + unsigned long pfn; + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + unsigned long section_nr = pfn_to_section_nr(pfn); + + if (!present_section_nr(section_nr)) + continue; + + if (pfn_to_nid(pfn) != nid) + continue; + + /* + * some memory sections of this node are not removed, and we + * can't offline node now. + */ + return; + } + + if (stop_machine(check_cpu_on_node, NODE_DATA(nid), NULL)) + return; + + /* + * all memory/cpu of this node are removed, we can offline this + * node now. + */ + node_set_offline(nid); + unregister_one_node(nid); +} + +int __ref remove_memory(int nid, u64 start, u64 size) { unsigned long start_pfn, end_pfn; int ret = 0; @@ -1734,6 +1786,8 @@ repeat: arch_remove_memory(start, size); + try_offline_node(nid); + unlock_memory_hotplug(); return 0; @@ -1743,7 +1797,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) { return -EINVAL; } -int remove_memory(u64 start, u64 size) +int remove_memory(int nid, u64 start, u64 size) { return -EINVAL; } -- cgit v1.2.3 From 90b30cdc1d87450e2ae89c8f8a29102dc2c1992e Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:33:27 -0800 Subject: memory-hotplug: export the function try_offline_node() try_offline_node() will be needed in the tristate drivers/acpi/processor_driver.c. The node will be offlined when all memory/cpu on the node have been hotremoved. So we need the function try_offline_node() in cpu-hotplug path. If the memory-hotplug is disabled, and cpu-hotplug is enabled 1. no memory no the node we don't online the node, and cpu's node is the nearest node. 2. the node contains some memory the node has been onlined, and cpu's node is still needed to migrate the sleep task on the cpu to the same node. So we do nothing in try_offline_node() in this case. [rientjes@google.com: export the function try_offline_node() fix] Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Len Brown Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 +++ mm/memory_hotplug.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 69903ccf549e..b6a3be7d47bf 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -233,6 +233,7 @@ static inline void unlock_memory_hotplug(void) {} #ifdef CONFIG_MEMORY_HOTREMOVE extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); +extern void try_offline_node(int nid); #else static inline int is_mem_section_removable(unsigned long pfn, @@ -240,6 +241,8 @@ static inline int is_mem_section_removable(unsigned long pfn, { return 0; } + +static inline void try_offline_node(int nid) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern int mem_online_node(int nid); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e189b1f4a9db..17e1447077ab 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1706,7 +1706,7 @@ static int check_cpu_on_node(void *data) } /* offline the node if all memory sections of this node are removed */ -static void try_offline_node(int nid) +void try_offline_node(int nid) { pg_data_t *pgdat = NODE_DATA(nid); unsigned long start_pfn = pgdat->node_start_pfn; @@ -1762,6 +1762,7 @@ static void try_offline_node(int nid) */ memset(pgdat, 0, sizeof(*pgdat)); } +EXPORT_SYMBOL(try_offline_node); int __ref remove_memory(int nid, u64 start, u64 size) { -- cgit v1.2.3 From 34b71f1e04fcba578e719e675b4882eeeb2a1f6f Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:37 -0800 Subject: page_alloc: add movable_memmap kernel parameter Add functions to parse movablemem_map boot option. Since the option could be specified more then once, all the maps will be stored in the global variable movablemem_map.map array. And also, we keep the array in monotonic increasing order by start_pfn. And merge all overlapped ranges. [akpm@linux-foundation.org: improve comment] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: remove unneeded parens] Signed-off-by: Tang Chen Signed-off-by: Lai Jiangshan Reviewed-by: Wen Congyang Tested-by: Lin Feng Cc: Wu Jianguo Cc: Mel Gorman Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 17 +++++ include/linux/mm.h | 11 +++ mm/page_alloc.c | 131 ++++++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9aa8ff3e54dc..722a74161246 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1640,6 +1640,23 @@ bytes respectively. Such letter suffixes can also be entirely omitted. that the amount of memory usable for all allocations is not too small. + movablemem_map=nn[KMG]@ss[KMG] + [KNL,X86,IA-64,PPC] This parameter is similar to + memmap except it specifies the memory map of + ZONE_MOVABLE. + If more areas are all within one node, then from + lowest ss to the end of the node will be ZONE_MOVABLE. + If an area covers two or more nodes, the area from + ss to the end of the 1st node will be ZONE_MOVABLE, + and all the rest nodes will only have ZONE_MOVABLE. + If memmap is specified at the same time, the + movablemem_map will be limited within the memmap + areas. If kernelcore or movablecore is also specified, + movablemem_map will have higher priority to be + satisfied. So the administrator should be careful that + the amount of movablemem_map areas are not too large. + Otherwise kernel won't have enough memory to start. + MTD_Partition= [MTD] Format: ,,, diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d659491c0ae..ce9bd3049836 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1359,6 +1359,17 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); extern void sparse_memory_present_with_active_regions(int nid); +#define MOVABLEMEM_MAP_MAX MAX_NUMNODES +struct movablemem_entry { + unsigned long start_pfn; /* start pfn of memory segment */ + unsigned long end_pfn; /* end pfn of memory segment (exclusive) */ +}; + +struct movablemem_map { + int nr_map; + struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; +}; + #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 703944809666..aa1cc5fe9904 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -202,6 +202,9 @@ static unsigned long __meminitdata nr_all_pages; static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +/* Movable memory ranges, will also be used by memblock subsystem. */ +struct movablemem_map movablemem_map; + static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; static unsigned long __initdata required_kernelcore; @@ -5078,6 +5081,134 @@ static int __init cmdline_parse_movablecore(char *p) early_param("kernelcore", cmdline_parse_kernelcore); early_param("movablecore", cmdline_parse_movablecore); +/** + * insert_movablemem_map - Insert a memory range in to movablemem_map.map. + * @start_pfn: start pfn of the range + * @end_pfn: end pfn of the range + * + * This function will also merge the overlapped ranges, and sort the array + * by start_pfn in monotonic increasing order. + */ +static void __init insert_movablemem_map(unsigned long start_pfn, + unsigned long end_pfn) +{ + int pos, overlap; + + /* + * pos will be at the 1st overlapped range, or the position + * where the element should be inserted. + */ + for (pos = 0; pos < movablemem_map.nr_map; pos++) + if (start_pfn <= movablemem_map.map[pos].end_pfn) + break; + + /* If there is no overlapped range, just insert the element. */ + if (pos == movablemem_map.nr_map || + end_pfn < movablemem_map.map[pos].start_pfn) { + /* + * If pos is not the end of array, we need to move all + * the rest elements backward. + */ + if (pos < movablemem_map.nr_map) + memmove(&movablemem_map.map[pos+1], + &movablemem_map.map[pos], + sizeof(struct movablemem_entry) * + (movablemem_map.nr_map - pos)); + movablemem_map.map[pos].start_pfn = start_pfn; + movablemem_map.map[pos].end_pfn = end_pfn; + movablemem_map.nr_map++; + return; + } + + /* overlap will be at the last overlapped range */ + for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++) + if (end_pfn < movablemem_map.map[overlap].start_pfn) + break; + + /* + * If there are more ranges overlapped, we need to merge them, + * and move the rest elements forward. + */ + overlap--; + movablemem_map.map[pos].start_pfn = min(start_pfn, + movablemem_map.map[pos].start_pfn); + movablemem_map.map[pos].end_pfn = max(end_pfn, + movablemem_map.map[overlap].end_pfn); + + if (pos != overlap && overlap + 1 != movablemem_map.nr_map) + memmove(&movablemem_map.map[pos+1], + &movablemem_map.map[overlap+1], + sizeof(struct movablemem_entry) * + (movablemem_map.nr_map - overlap - 1)); + + movablemem_map.nr_map -= overlap - pos; +} + +/** + * movablemem_map_add_region - Add a memory range into movablemem_map. + * @start: physical start address of range + * @end: physical end address of range + * + * This function transform the physical address into pfn, and then add the + * range into movablemem_map by calling insert_movablemem_map(). + */ +static void __init movablemem_map_add_region(u64 start, u64 size) +{ + unsigned long start_pfn, end_pfn; + + /* In case size == 0 or start + size overflows */ + if (start + size <= start) + return; + + if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) { + pr_err("movablemem_map: too many entries;" + " ignoring [mem %#010llx-%#010llx]\n", + (unsigned long long) start, + (unsigned long long) (start + size - 1)); + return; + } + + start_pfn = PFN_DOWN(start); + end_pfn = PFN_UP(start + size); + insert_movablemem_map(start_pfn, end_pfn); +} + +/* + * cmdline_parse_movablemem_map - Parse boot option movablemem_map. + * @p: The boot option of the following format: + * movablemem_map=nn[KMG]@ss[KMG] + * + * This option sets the memory range [ss, ss+nn) to be used as movable memory. + * + * Return: 0 on success or -EINVAL on failure. + */ +static int __init cmdline_parse_movablemem_map(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (!p) + goto err; + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + goto err; + + if (*p == '@') { + oldp = ++p; + start_at = memparse(p, &p); + if (p == oldp || *p != '\0') + goto err; + + movablemem_map_add_region(start_at, mem_size); + return 0; + } +err: + return -EINVAL; +} +early_param("movablemem_map", cmdline_parse_movablemem_map); + #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ /** -- cgit v1.2.3 From fb06bc8e5f42f38c011de0e59481f464a82380f6 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:42 -0800 Subject: page_alloc: bootmem limit with movablecore_map Ensure the bootmem will not allocate memory from areas that may be ZONE_MOVABLE. The map info is from movablecore_map boot option. Signed-off-by: Tang Chen Reviewed-by: Wen Congyang Reviewed-by: Lai Jiangshan Tested-by: Lin Feng Cc: Wu Jianguo Cc: Mel Gorman Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + mm/memblock.c | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f388203db7e8..dfefaf111c0e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,6 +42,7 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; +extern struct movablemem_map movablemem_map; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) diff --git a/mm/memblock.c b/mm/memblock.c index b8d9147e5c08..c83ff97f57f4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -101,6 +101,7 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, { phys_addr_t this_start, this_end, cand; u64 i; + int curr = movablemem_map.nr_map - 1; /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE) @@ -114,13 +115,28 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, this_start = clamp(this_start, start, end); this_end = clamp(this_end, start, end); - if (this_end < size) +restart: + if (this_end <= this_start || this_end < size) continue; + for (; curr >= 0; curr--) { + if ((movablemem_map.map[curr].start_pfn << PAGE_SHIFT) + < this_end) + break; + } + cand = round_down(this_end - size, align); + if (curr >= 0 && + cand < movablemem_map.map[curr].end_pfn << PAGE_SHIFT) { + this_end = movablemem_map.map[curr].start_pfn + << PAGE_SHIFT; + goto restart; + } + if (cand >= this_start) return cand; } + return 0; } -- cgit v1.2.3 From e8d1955258091e4c92d5a975ebd7fd8a98f5d30f Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:44 -0800 Subject: acpi, memory-hotplug: parse SRAT before memblock is ready On linux, the pages used by kernel could not be migrated. As a result, if a memory range is used by kernel, it cannot be hot-removed. So if we want to hot-remove memory, we should prevent kernel from using it. The way now used to prevent this is specify a memory range by movablemem_map boot option and set it as ZONE_MOVABLE. But when the system is booting, memblock will allocate memory, and reserve the memory for kernel. And before we parse SRAT, and know the node memory ranges, memblock is working. And it may allocate memory in ranges to be set as ZONE_MOVABLE. This memory can be used by kernel, and never be freed. So, let's parse SRAT before memblock is called first. And it is early enough. The first call of memblock_find_in_range_node() is in: setup_arch() |-->setup_real_mode() so, this patch add a function early_parse_srat() to parse SRAT, and call it before setup_real_mode() is called. NOTE: 1) early_parse_srat() is called before numa_init(), and has initialized numa_meminfo. So DO NOT clear numa_nodes_parsed in numa_init() and DO NOT zero numa_meminfo in numa_init(), otherwise we will lose memory numa info. 2) I don't know why using count of memory affinities parsed from SRAT as a return value in original acpi_numa_init(). So I add a static variable srat_mem_cnt to remember this count and use it as the return value of the new acpi_numa_init() [mhocko@suse.cz: parse SRAT before memblock is ready fix] Signed-off-by: Tang Chen Reviewed-by: Wen Congyang Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 13 +++++++++---- arch/x86/mm/numa.c | 6 ++++-- drivers/acpi/numa.c | 23 +++++++++++++---------- include/linux/acpi.h | 8 ++++++++ 4 files changed, 34 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 915f5efefcf5..9c857f05cef0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1056,6 +1056,15 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif + /* + * In the memory hotplug case, the kernel needs info from SRAT to + * determine which memory is hotpluggable before allocating memory + * using memblock. + */ + acpi_boot_table_init(); + early_acpi_boot_init(); + early_parse_srat(); + #ifdef CONFIG_X86_32 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", (max_pfn_mapped< Date: Fri, 22 Feb 2013 16:33:46 -0800 Subject: acpi, memory-hotplug: extend movablemem_map ranges to the end of node When implementing movablemem_map boot option, we introduced an array movablemem_map.map[] to store the memory ranges to be set as ZONE_MOVABLE. Since ZONE_MOVABLE is the latst zone of a node, if user didn't specify the whole node memory range, we need to extend it to the node end so that we can use it to prevent memblock from allocating memory in the ranges user didn't specify. We now implement movablemem_map boot option like this: /* * For movablemem_map=nn[KMG]@ss[KMG]: * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 * user specified: |__| |___| * movablemem_map: |___| |_________| |______| ...... * * Using movablemem_map, we can prevent memblock from allocating memory * on ZONE_MOVABLE at boot time. * * NOTE: In this case, SRAT info will be ingored. */ [akpm@linux-foundation.org: clean up code, fix build warning] Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/srat.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/mm.h | 5 +++++ mm/page_alloc.c | 34 +++++++++++++++++++++++++++-- 3 files changed, 98 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index cdd0da9dd530..3e90039e52e0 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -141,11 +141,65 @@ static inline int save_add_info(void) {return 1;} static inline int save_add_info(void) {return 0;} #endif +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +static void __init handle_movablemem(int node, u64 start, u64 end) +{ + int overlap; + unsigned long start_pfn, end_pfn; + + start_pfn = PFN_DOWN(start); + end_pfn = PFN_UP(end); + + /* + * For movablecore_map=nn[KMG]@ss[KMG]: + * + * SRAT: |_____| |_____| |_________| |_________| ...... + * node id: 0 1 1 2 + * user specified: |__| |___| + * movablemem_map: |___| |_________| |______| ...... + * + * Using movablemem_map, we can prevent memblock from allocating memory + * on ZONE_MOVABLE at boot time. + */ + overlap = movablemem_map_overlap(start_pfn, end_pfn); + if (overlap >= 0) { + /* + * If part of this range is in movablemem_map, we need to + * add the range after it to extend the range to the end + * of the node, because from the min address specified to + * the end of the node will be ZONE_MOVABLE. + */ + start_pfn = max(start_pfn, + movablemem_map.map[overlap].start_pfn); + insert_movablemem_map(start_pfn, end_pfn); + + /* + * Set the nodemask, so that if the address range on one node + * is not continuse, we can add the subsequent ranges on the + * same node into movablemem_map. + */ + node_set(node, movablemem_map.numa_nodes_hotplug); + } else { + if (node_isset(node, movablemem_map.numa_nodes_hotplug)) + /* + * Insert the range if we already have movable ranges + * on the same node. + */ + insert_movablemem_map(start_pfn, end_pfn); + } +} +#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +static inline void handle_movablemem(int node, u64 start, u64 end) +{ +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ int __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { u64 start, end; + u32 hotpluggable; int node, pxm; if (srat_disabled()) @@ -154,7 +208,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) goto out_err_bad_srat; if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) goto out_err; - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) + hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; + if (hotpluggable && !save_add_info()) goto out_err; start = ma->base_address; @@ -174,9 +229,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", node, pxm, - (unsigned long long) start, (unsigned long long) end - 1); + (unsigned long long) start, (unsigned long long) end - 1, + hotpluggable ? "Hot Pluggable": ""); + + handle_movablemem(node, start, end); return 0; out_err_bad_srat: diff --git a/include/linux/mm.h b/include/linux/mm.h index ce9bd3049836..4d7377a1d084 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1368,8 +1368,13 @@ struct movablemem_entry { struct movablemem_map { int nr_map; struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; + nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */ }; +extern void __init insert_movablemem_map(unsigned long start_pfn, + unsigned long end_pfn); +extern int __init movablemem_map_overlap(unsigned long start_pfn, + unsigned long end_pfn); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 88b9962c99b3..7ea9a003ad57 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5175,6 +5175,36 @@ static int __init cmdline_parse_movablecore(char *p) early_param("kernelcore", cmdline_parse_kernelcore); early_param("movablecore", cmdline_parse_movablecore); +/** + * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[]. + * @start_pfn: start pfn of the range to be checked + * @end_pfn: end pfn of the range to be checked (exclusive) + * + * This function checks if a given memory range [start_pfn, end_pfn) overlaps + * the movablemem_map.map[] array. + * + * Return: index of the first overlapped element in movablemem_map.map[] + * or -1 if they don't overlap each other. + */ +int __init movablemem_map_overlap(unsigned long start_pfn, + unsigned long end_pfn) +{ + int overlap; + + if (!movablemem_map.nr_map) + return -1; + + for (overlap = 0; overlap < movablemem_map.nr_map; overlap++) + if (start_pfn < movablemem_map.map[overlap].end_pfn) + break; + + if (overlap == movablemem_map.nr_map || + end_pfn <= movablemem_map.map[overlap].start_pfn) + return -1; + + return overlap; +} + /** * insert_movablemem_map - Insert a memory range in to movablemem_map.map. * @start_pfn: start pfn of the range @@ -5183,8 +5213,8 @@ early_param("movablecore", cmdline_parse_movablecore); * This function will also merge the overlapped ranges, and sort the array * by start_pfn in monotonic increasing order. */ -static void __init insert_movablemem_map(unsigned long start_pfn, - unsigned long end_pfn) +void __init insert_movablemem_map(unsigned long start_pfn, + unsigned long end_pfn) { int pos, overlap; -- cgit v1.2.3 From 01a178a94e8eaec351b29ee49fbb3d1c124cb7fb Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:49 -0800 Subject: acpi, memory-hotplug: support getting hotplug info from SRAT We now provide an option for users who don't want to specify physical memory address in kernel commandline. /* * For movablemem_map=acpi: * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 * hotpluggable: n y y n * movablemem_map: |_____| |_________| * * Using movablemem_map, we can prevent memblock from allocating memory * on ZONE_MOVABLE at boot time. */ So user just specify movablemem_map=acpi, and the kernel will use hotpluggable info in SRAT to determine which memory ranges should be set as ZONE_MOVABLE. If all the memory ranges in SRAT is hotpluggable, then no memory can be used by kernel. But before parsing SRAT, memblock has already reserve some memory ranges for other purposes, such as for kernel image, and so on. We cannot prevent kernel from using these memory. So we need to exclude these ranges even if these memory is hotpluggable. Furthermore, there could be several memory ranges in the single node which the kernel resides in. We may skip one range that have memory reserved by memblock, but if the rest of memory is too small, then the kernel will fail to boot. So, make the whole node which the kernel resides in un-hotpluggable. Then the kernel has enough memory to use. NOTE: Using this way will cause NUMA performance down because the whole node will be set as ZONE_MOVABLE, and kernel cannot use memory on it. If users don't want to lose NUMA performance, just don't use it. [akpm@linux-foundation.org: fix warning] [akpm@linux-foundation.org: use strcmp()] Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 29 ++++++++++++--- arch/x86/mm/srat.c | 71 ++++++++++++++++++++++++++++++++++--- include/linux/mm.h | 2 ++ mm/page_alloc.c | 22 +++++++++++- 4 files changed, 113 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 722a74161246..766087781ecd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1640,15 +1640,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted. that the amount of memory usable for all allocations is not too small. + movablemem_map=acpi + [KNL,X86,IA-64,PPC] This parameter is similar to + memmap except it specifies the memory map of + ZONE_MOVABLE. + This option inform the kernel to use Hot Pluggable bit + in flags from SRAT from ACPI BIOS to determine which + memory devices could be hotplugged. The corresponding + memory ranges will be set as ZONE_MOVABLE. + NOTE: Whatever node the kernel resides in will always + be un-hotpluggable. + movablemem_map=nn[KMG]@ss[KMG] [KNL,X86,IA-64,PPC] This parameter is similar to memmap except it specifies the memory map of ZONE_MOVABLE. - If more areas are all within one node, then from - lowest ss to the end of the node will be ZONE_MOVABLE. - If an area covers two or more nodes, the area from - ss to the end of the 1st node will be ZONE_MOVABLE, - and all the rest nodes will only have ZONE_MOVABLE. + If user specifies memory ranges, the info in SRAT will + be ingored. And it works like the following: + - If more ranges are all within one node, then from + lowest ss to the end of the node will be ZONE_MOVABLE. + - If a range is within a node, then from ss to the end + of the node will be ZONE_MOVABLE. + - If a range covers two or more nodes, then from ss to + the end of the 1st node will be ZONE_MOVABLE, and all + the rest nodes will only have ZONE_MOVABLE. If memmap is specified at the same time, the movablemem_map will be limited within the memmap areas. If kernelcore or movablecore is also specified, @@ -1656,6 +1671,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. satisfied. So the administrator should be careful that the amount of movablemem_map areas are not too large. Otherwise kernel won't have enough memory to start. + NOTE: We don't stop users specifying the node the + kernel resides in as hotpluggable so that this + option can be used as a workaround of firmware + bugs. MTD_Partition= [MTD] Format: ,,, diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 3e90039e52e0..79836d01f789 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -142,16 +142,72 @@ static inline int save_add_info(void) {return 0;} #endif #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -static void __init handle_movablemem(int node, u64 start, u64 end) +static void __init +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) { - int overlap; + int overlap, i; unsigned long start_pfn, end_pfn; start_pfn = PFN_DOWN(start); end_pfn = PFN_UP(end); /* - * For movablecore_map=nn[KMG]@ss[KMG]: + * For movablemem_map=acpi: + * + * SRAT: |_____| |_____| |_________| |_________| ...... + * node id: 0 1 1 2 + * hotpluggable: n y y n + * movablemem_map: |_____| |_________| + * + * Using movablemem_map, we can prevent memblock from allocating memory + * on ZONE_MOVABLE at boot time. + * + * Before parsing SRAT, memblock has already reserve some memory ranges + * for other purposes, such as for kernel image. We cannot prevent + * kernel from using these memory, so we need to exclude these memory + * even if it is hotpluggable. + * Furthermore, to ensure the kernel has enough memory to boot, we make + * all the memory on the node which the kernel resides in + * un-hotpluggable. + */ + if (hotpluggable && movablemem_map.acpi) { + /* Exclude ranges reserved by memblock. */ + struct memblock_type *rgn = &memblock.reserved; + + for (i = 0; i < rgn->cnt; i++) { + if (end <= rgn->regions[i].base || + start >= rgn->regions[i].base + + rgn->regions[i].size) + continue; + + /* + * If the memory range overlaps the memory reserved by + * memblock, then the kernel resides in this node. + */ + node_set(node, movablemem_map.numa_nodes_kernel); + + goto out; + } + + /* + * If the kernel resides in this node, then the whole node + * should not be hotpluggable. + */ + if (node_isset(node, movablemem_map.numa_nodes_kernel)) + goto out; + + insert_movablemem_map(start_pfn, end_pfn); + + /* + * numa_nodes_hotplug nodemask represents which nodes are put + * into movablemem_map.map[]. + */ + node_set(node, movablemem_map.numa_nodes_hotplug); + goto out; + } + + /* + * For movablemem_map=nn[KMG]@ss[KMG]: * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 @@ -160,6 +216,8 @@ static void __init handle_movablemem(int node, u64 start, u64 end) * * Using movablemem_map, we can prevent memblock from allocating memory * on ZONE_MOVABLE at boot time. + * + * NOTE: In this case, SRAT info will be ingored. */ overlap = movablemem_map_overlap(start_pfn, end_pfn); if (overlap >= 0) { @@ -187,9 +245,12 @@ static void __init handle_movablemem(int node, u64 start, u64 end) */ insert_movablemem_map(start_pfn, end_pfn); } +out: + return; } #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline void handle_movablemem(int node, u64 start, u64 end) +static inline void +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) { } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ @@ -234,7 +295,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) (unsigned long long) start, (unsigned long long) end - 1, hotpluggable ? "Hot Pluggable": ""); - handle_movablemem(node, start, end); + handle_movablemem(node, start, end, hotpluggable); return 0; out_err_bad_srat: diff --git a/include/linux/mm.h b/include/linux/mm.h index 4d7377a1d084..72a42c0fa633 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1366,9 +1366,11 @@ struct movablemem_entry { }; struct movablemem_map { + bool acpi; /* true if using SRAT info */ int nr_map; struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */ + nodemask_t numa_nodes_kernel; /* on which nodes kernel resides in */ }; extern void __init insert_movablemem_map(unsigned long start_pfn, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7ea9a003ad57..a7381be21320 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -203,7 +203,10 @@ static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* Movable memory ranges, will also be used by memblock subsystem. */ -struct movablemem_map movablemem_map; +struct movablemem_map movablemem_map = { + .acpi = false, + .nr_map = 0, +}; static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; @@ -5314,6 +5317,23 @@ static int __init cmdline_parse_movablemem_map(char *p) if (!p) goto err; + if (!strcmp(p, "acpi")) + movablemem_map.acpi = true; + + /* + * If user decide to use info from BIOS, all the other user specified + * ranges will be ingored. + */ + if (movablemem_map.acpi) { + if (movablemem_map.nr_map) { + memset(movablemem_map.map, 0, + sizeof(struct movablemem_entry) + * movablemem_map.nr_map); + movablemem_map.nr_map = 0; + } + return 0; + } + oldp = p; mem_size = memparse(p, &p); if (p == oldp) -- cgit v1.2.3 From f7210e6c4ac795694106c1c5307134d3fc233e88 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:51 -0800 Subject: mm/memblock.c: use CONFIG_HAVE_MEMBLOCK_NODE_MAP to protect movablecore_map in memblock_overlaps_region(). The definition of struct movablecore_map is protected by CONFIG_HAVE_MEMBLOCK_NODE_MAP but its use in memblock_overlaps_region() is not. So add CONFIG_HAVE_MEMBLOCK_NODE_MAP to protect the use of movablecore_map in memblock_overlaps_region(). Signed-off-by: Tang Chen Reported-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + mm/memblock.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index dfefaf111c0e..3e5ecb2d790e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,7 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); diff --git a/mm/memblock.c b/mm/memblock.c index c83ff97f57f4..1bcd9b970564 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -92,9 +92,13 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type, * * Find @size free area aligned to @align in the specified range and node. * + * If we have CONFIG_HAVE_MEMBLOCK_NODE_MAP defined, we need to check if the + * memory we found if not in hotpluggable ranges. + * * RETURNS: * Found address on success, %0 on failure. */ +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align, int nid) @@ -139,6 +143,36 @@ restart: return 0; } +#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, + phys_addr_t end, phys_addr_t size, + phys_addr_t align, int nid) +{ + phys_addr_t this_start, this_end, cand; + u64 i; + + /* pump up @end */ + if (end == MEMBLOCK_ALLOC_ACCESSIBLE) + end = memblock.current_limit; + + /* avoid allocating the first page */ + start = max_t(phys_addr_t, start, PAGE_SIZE); + end = max(start, end); + + for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { + this_start = clamp(this_start, start, end); + this_end = clamp(this_end, start, end); + + if (this_end < size) + continue; + + cand = round_down(this_end - size, align); + if (cand >= this_start) + return cand; + } + return 0; +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ /** * memblock_find_in_range - find free area in given range -- cgit v1.2.3 From 194159fbcc0d6ac1351837d3cd7a27a4af0219a6 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 22 Feb 2013 16:33:58 -0800 Subject: mm: remove MIGRATE_ISOLATE check in hotpath Several functions test MIGRATE_ISOLATE and some of those are hotpath but MIGRATE_ISOLATE is used only if we enable CONFIG_MEMORY_ISOLATION(ie, CMA, memory-hotplug and memory-failure) which are not common config option. So let's not add unnecessary overhead and code when we don't enable CONFIG_MEMORY_ISOLATION. Signed-off-by: Minchan Kim Cc: KOSAKI Motohiro Acked-by: Michal Nazarewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ include/linux/page-isolation.h | 19 +++++++++++++++++++ mm/compaction.c | 6 +++++- mm/page_alloc.c | 16 ++++++++++------ mm/vmstat.c | 2 ++ 5 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 73b64a38b984..4f4c8c26fa9d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -57,7 +57,9 @@ enum { */ MIGRATE_CMA, #endif +#ifdef CONFIG_MEMORY_ISOLATION MIGRATE_ISOLATE, /* can't allocate from here */ +#endif MIGRATE_TYPES }; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index a92061e08d48..3fff8e774067 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -1,6 +1,25 @@ #ifndef __LINUX_PAGEISOLATION_H #define __LINUX_PAGEISOLATION_H +#ifdef CONFIG_MEMORY_ISOLATION +static inline bool is_migrate_isolate_page(struct page *page) +{ + return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; +} +static inline bool is_migrate_isolate(int migratetype) +{ + return migratetype == MIGRATE_ISOLATE; +} +#else +static inline bool is_migrate_isolate_page(struct page *page) +{ + return false; +} +static inline bool is_migrate_isolate(int migratetype) +{ + return false; +} +#endif bool has_unmovable_pages(struct zone *zone, struct page *page, int count, bool skip_hwpoisoned_pages); diff --git a/mm/compaction.c b/mm/compaction.c index 5d5b0b259bc9..ef53f352b606 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "internal.h" #ifdef CONFIG_COMPACTION @@ -215,7 +216,10 @@ static bool suitable_migration_target(struct page *page) int migratetype = get_pageblock_migratetype(page); /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) + if (migratetype == MIGRATE_RESERVE) + return false; + + if (is_migrate_isolate(migratetype)) return false; /* If the page is a large free page, then allow migration */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07fe78d01ffd..e3fb290194c0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -673,7 +673,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ __free_one_page(page, zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); - if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) { + if (likely(!is_migrate_isolate_page(page))) { __mod_zone_page_state(zone, NR_FREE_PAGES, 1); if (is_migrate_cma(mt)) __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); @@ -691,7 +691,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order, zone->pages_scanned = 0; __free_one_page(page, zone, order, migratetype); - if (unlikely(migratetype != MIGRATE_ISOLATE)) + if (unlikely(!is_migrate_isolate(migratetype))) __mod_zone_freepage_state(zone, 1 << order, migratetype); spin_unlock(&zone->lock); } @@ -923,7 +923,9 @@ static int fallbacks[MIGRATE_TYPES][4] = { [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, #endif [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ +#ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ +#endif }; /* @@ -1149,7 +1151,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, list_add_tail(&page->lru, list); if (IS_ENABLED(CONFIG_CMA)) { mt = get_pageblock_migratetype(page); - if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) + if (!is_migrate_cma(mt) && !is_migrate_isolate(mt)) mt = migratetype; } set_freepage_migratetype(page, mt); @@ -1333,7 +1335,7 @@ void free_hot_cold_page(struct page *page, int cold) * excessively into the page allocator */ if (migratetype >= MIGRATE_PCPTYPES) { - if (unlikely(migratetype == MIGRATE_ISOLATE)) { + if (unlikely(is_migrate_isolate(migratetype))) { free_one_page(zone, page, 0, migratetype); goto out; } @@ -1407,7 +1409,7 @@ static int __isolate_free_page(struct page *page, unsigned int order) zone = page_zone(page); mt = get_pageblock_migratetype(page); - if (mt != MIGRATE_ISOLATE) { + if (!is_migrate_isolate(mt)) { /* Obey watermarks as if the page was being allocated */ watermark = low_wmark_pages(zone) + (1 << order); if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) @@ -1426,7 +1428,7 @@ static int __isolate_free_page(struct page *page, unsigned int order) struct page *endpage = page + (1 << order) - 1; for (; page < endpage; page += pageblock_nr_pages) { int mt = get_pageblock_migratetype(page); - if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) + if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)) set_pageblock_migratetype(page, MIGRATE_MOVABLE); } @@ -2904,7 +2906,9 @@ static void show_migration_types(unsigned char type) #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', #endif +#ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = 'I', +#endif }; char tmp[MIGRATE_TYPES + 1]; char *p = tmp; diff --git a/mm/vmstat.c b/mm/vmstat.c index e3475f5fd983..c9d1f68120cd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -628,7 +628,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = { #ifdef CONFIG_CMA "CMA", #endif +#ifdef CONFIG_MEMORY_ISOLATION "Isolate", +#endif }; static void *frag_start(struct seq_file *m, loff_t *pos) -- cgit v1.2.3 From 293c07e31ab5a0b8df8c19b2a9e5c6fa30308849 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Fri, 22 Feb 2013 16:34:02 -0800 Subject: memory-failure: use num_poisoned_pages instead of mce_bad_pages Since MCE is an x86 concept, and this code is in mm/, it would be better to use the name num_poisoned_pages instead of mce_bad_pages. [akpm@linux-foundation.org: fix mm/sparse.c] Signed-off-by: Xishi Qiu Signed-off-by: Jiang Liu Suggested-by: Borislav Petkov Reviewed-by: Wanpeng Li Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- include/linux/mm.h | 2 +- mm/memory-failure.c | 16 ++++++++-------- mm/sparse.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 80e4645f7990..c3dac611c3c0 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -158,7 +158,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) vmi.used >> 10, vmi.largest_chunk >> 10 #ifdef CONFIG_MEMORY_FAILURE - ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) + ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * diff --git a/include/linux/mm.h b/include/linux/mm.h index 72a42c0fa633..a114b8eb7676 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1753,7 +1753,7 @@ extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); -extern atomic_long_t mce_bad_pages; +extern atomic_long_t num_poisoned_pages; extern int soft_offline_page(struct page *page, int flags); extern void dump_page(struct page *page); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f4c9fa1149e2..c95e19af510b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -61,7 +61,7 @@ int sysctl_memory_failure_early_kill __read_mostly = 0; int sysctl_memory_failure_recovery __read_mostly = 1; -atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); +atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) @@ -1040,7 +1040,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } nr_pages = 1 << compound_trans_order(hpage); - atomic_long_add(nr_pages, &mce_bad_pages); + atomic_long_add(nr_pages, &num_poisoned_pages); /* * We need/can do nothing about count=0 pages. @@ -1070,7 +1070,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) if (!PageHWPoison(hpage) || (hwpoison_filter(p) && TestClearPageHWPoison(p)) || (p != hpage && TestSetPageHWPoison(hpage))) { - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); return 0; } set_page_hwpoison_huge_page(hpage); @@ -1128,7 +1128,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); unlock_page(hpage); put_page(hpage); return 0; @@ -1323,7 +1323,7 @@ int unpoison_memory(unsigned long pfn) return 0; } if (TestClearPageHWPoison(p)) - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); return 0; } @@ -1337,7 +1337,7 @@ int unpoison_memory(unsigned long pfn) */ if (TestClearPageHWPoison(page)) { pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); freeit = 1; if (PageHuge(page)) clear_page_hwpoison_huge_page(page); @@ -1442,7 +1442,7 @@ static int soft_offline_huge_page(struct page *page, int flags) } done: /* keep elevated page count for bad page */ - atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages); + atomic_long_add(1 << compound_trans_order(hpage), &num_poisoned_pages); set_page_hwpoison_huge_page(hpage); dequeue_hwpoisoned_huge_page(hpage); out: @@ -1585,7 +1585,7 @@ int soft_offline_page(struct page *page, int flags) done: /* keep elevated page count for bad page */ - atomic_long_inc(&mce_bad_pages); + atomic_long_inc(&num_poisoned_pages); SetPageHWPoison(page); out: return ret; diff --git a/mm/sparse.c b/mm/sparse.c index cff97960f1d7..7ca6dc847947 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -783,7 +783,7 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) for (i = 0; i < PAGES_PER_SECTION; i++) { if (PageHWPoison(&memmap[i])) { - atomic_long_sub(1, &mce_bad_pages); + atomic_long_sub(1, &num_poisoned_pages); ClearPageHWPoison(&memmap[i]); } } -- cgit v1.2.3 From 258401a60c4df39332f30ef57afbc6dbf29a7e84 Mon Sep 17 00:00:00 2001 From: Zlatko Calusic Date: Fri, 22 Feb 2013 16:34:06 -0800 Subject: mm: don't wait on congested zones in balance_pgdat() From: Zlatko Calusic Commit 92df3a723f84 ("mm: vmscan: throttle reclaim if encountering too many dirty pages under writeback") introduced waiting on congested zones based on a sane algorithm in shrink_inactive_list(). What this means is that there's no more need for throttling and additional heuristics in balance_pgdat(). So, let's remove it and tidy up the code. Signed-off-by: Zlatko Calusic Cc: Mel Gorman Cc: Minchan Kim Cc: Johannes Weiner Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 1 - mm/vmscan.c | 29 +---------------------------- mm/vmstat.c | 1 - 3 files changed, 1 insertion(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index fce0a2799d43..bd6cf61142be 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -36,7 +36,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL, KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, - KSWAPD_SKIP_CONGESTION_WAIT, PAGEOUTRUN, ALLOCSTALL, PGROTATED, #ifdef CONFIG_NUMA_BALANCING NUMA_PTE_UPDATES, diff --git a/mm/vmscan.c b/mm/vmscan.c index 8fde2fc223d9..b93968b71dc6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2617,7 +2617,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int *classzone_idx) { bool pgdat_is_balanced = false; - struct zone *unbalanced_zone; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long total_scanned; @@ -2648,9 +2647,6 @@ loop_again: do { unsigned long lru_pages = 0; - int has_under_min_watermark_zone = 0; - - unbalanced_zone = NULL; /* * Scan in the highmem->dma direction for the highest @@ -2790,17 +2786,7 @@ loop_again: continue; } - if (!zone_balanced(zone, testorder, 0, end_zone)) { - unbalanced_zone = zone; - /* - * We are still under min water mark. This - * means that we have a GFP_ATOMIC allocation - * failure risk. Hurry up! - */ - if (!zone_watermark_ok_safe(zone, order, - min_wmark_pages(zone), end_zone, 0)) - has_under_min_watermark_zone = 1; - } else { + if (zone_balanced(zone, testorder, 0, end_zone)) /* * If a zone reaches its high watermark, * consider it to be no longer congested. It's @@ -2809,8 +2795,6 @@ loop_again: * speculatively avoid congestion waits */ zone_clear_flag(zone, ZONE_CONGESTED); - } - } /* @@ -2827,17 +2811,6 @@ loop_again: break; /* kswapd: all done */ } - /* - * OK, kswapd is getting into trouble. Take a nap, then take - * another pass across the zones. - */ - if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { - if (has_under_min_watermark_zone) - count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); - else if (unbalanced_zone) - wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); - } - /* * We do this so kswapd doesn't build up large priorities for * example when it is freeing in parallel with allocators. It diff --git a/mm/vmstat.c b/mm/vmstat.c index c9d1f68120cd..57f02fd1768b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -770,7 +770,6 @@ const char * const vmstat_text[] = { "kswapd_inodesteal", "kswapd_low_wmark_hit_quickly", "kswapd_high_wmark_hit_quickly", - "kswapd_skip_congestion_wait", "pageoutrun", "allocstall", -- cgit v1.2.3 From 21caf2fc1931b485483ddd254b634fa8f0099963 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Feb 2013 16:34:08 -0800 Subject: mm: teach mm by current context info to not do I/O during memory allocation This patch introduces PF_MEMALLOC_NOIO on process flag('flags' field of 'struct task_struct'), so that the flag can be set by one task to avoid doing I/O inside memory allocation in the task's context. The patch trys to solve one deadlock problem caused by block device, and the problem may happen at least in the below situations: - during block device runtime resume, if memory allocation with GFP_KERNEL is called inside runtime resume callback of any one of its ancestors(or the block device itself), the deadlock may be triggered inside the memory allocation since it might not complete until the block device becomes active and the involed page I/O finishes. The situation is pointed out first by Alan Stern. It is not a good approach to convert all GFP_KERNEL[1] in the path into GFP_NOIO because several subsystems may be involved(for example, PCI, USB and SCSI may be involved for usb mass stoarage device, network devices involved too in the iSCSI case) - during block device runtime suspend, because runtime resume need to wait for completion of concurrent runtime suspend. - during error handling of usb mass storage deivce, USB bus reset will be put on the device, so there shouldn't have any memory allocation with GFP_KERNEL during USB bus reset, otherwise the deadlock similar with above may be triggered. Unfortunately, any usb device may include one mass storage interface in theory, so it requires all usb interface drivers to handle the situation. In fact, most usb drivers don't know how to handle bus reset on the device and don't provide .pre_set() and .post_reset() callback at all, so USB core has to unbind and bind driver for these devices. So it is still not practical to resort to GFP_NOIO for solving the problem. Also the introduced solution can be used by block subsystem or block drivers too, for example, set the PF_MEMALLOC_NOIO flag before doing actual I/O transfer. It is not a good idea to convert all these GFP_KERNEL in the affected path into GFP_NOIO because these functions doing that may be implemented as library and will be called in many other contexts. In fact, memalloc_noio_flags() can convert some of current static GFP_NOIO allocation into GFP_KERNEL back in other non-affected contexts, at least almost all GFP_NOIO in USB subsystem can be converted into GFP_KERNEL after applying the approach and make allocation with GFP_NOIO only happen in runtime resume/bus reset/block I/O transfer contexts generally. [1], several GFP_KERNEL allocation examples in runtime resume path - pci subsystem acpi_os_allocate <-acpi_ut_allocate <-ACPI_ALLOCATE_ZEROED <-acpi_evaluate_object <-__acpi_bus_set_power <-acpi_bus_set_power <-acpi_pci_set_power_state <-platform_pci_set_power_state <-pci_platform_power_transition <-__pci_complete_power_transition <-pci_set_power_state <-pci_restore_standard_config <-pci_pm_runtime_resume - usb subsystem usb_get_status <-finish_port_resume <-usb_port_resume <-generic_resume <-usb_resume_device <-usb_resume_both <-usb_runtime_resume - some individual usb drivers usblp, uvc, gspca, most of dvb-usb-v2 media drivers, cpia2, az6007, .... That is just what I have found. Unfortunately, this allocation can only be found by human being now, and there should be many not found since any function in the resume path(call tree) may allocate memory with GFP_KERNEL. Signed-off-by: Ming Lei Signed-off-by: Minchan Kim Cc: Alan Stern Cc: Oliver Neukum Cc: Jiri Kosina Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: Ingo Molnar Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Greg KH Cc: Jens Axboe Cc: "David S. Miller" Cc: Eric Dumazet Cc: David Decotigny Cc: Tom Herbert Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 22 ++++++++++++++++++++++ mm/page_alloc.c | 9 ++++++++- mm/vmscan.c | 4 ++-- 3 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e4112aad2964..c2182b53dace 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -51,6 +51,7 @@ struct sched_param { #include #include #include +#include #include @@ -1791,6 +1792,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ #define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ @@ -1828,6 +1830,26 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) +/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */ +static inline gfp_t memalloc_noio_flags(gfp_t flags) +{ + if (unlikely(current->flags & PF_MEMALLOC_NOIO)) + flags &= ~__GFP_IO; + return flags; +} + +static inline unsigned int memalloc_noio_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOIO; + current->flags |= PF_MEMALLOC_NOIO; + return flags; +} + +static inline void memalloc_noio_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; +} + /* * task->jobctl flags */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e3fb290194c0..3ede25e6686e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2624,10 +2624,17 @@ retry_cpuset: page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, alloc_flags, preferred_zone, migratetype); - if (unlikely(!page)) + if (unlikely(!page)) { + /* + * Runtime PM, block IO and its error handling path + * can deadlock because I/O on the device might not + * complete. + */ + gfp_mask = memalloc_noio_flags(gfp_mask); page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); + } trace_mm_page_alloc(page, order, gfp_mask, migratetype); diff --git a/mm/vmscan.c b/mm/vmscan.c index b93968b71dc6..a68fa20269d9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2352,7 +2352,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, { unsigned long nr_reclaimed; struct scan_control sc = { - .gfp_mask = gfp_mask, + .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), .may_writepage = !laptop_mode, .nr_to_reclaim = SWAP_CLUSTER_MAX, .may_unmap = 1, @@ -3313,7 +3313,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), .may_swap = 1, .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), - .gfp_mask = gfp_mask, + .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), .order = order, .priority = ZONE_RECLAIM_PRIORITY, }; -- cgit v1.2.3 From e823407f7b11fa06ba8e7a2801eb9ed11268a7ec Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Feb 2013 16:34:11 -0800 Subject: pm / runtime: introduce pm_runtime_set_memalloc_noio() Introduce the flag memalloc_noio in 'struct dev_pm_info' to help PM core to teach mm not allocating memory with GFP_KERNEL flag for avoiding probable deadlock. As explained in the comment, any GFP_KERNEL allocation inside runtime_resume() or runtime_suspend() on any one of device in the path from one block or network device to the root device in the device tree may cause deadlock, the introduced pm_runtime_set_memalloc_noio() sets or clears the flag on device in the path recursively. Signed-off-by: Ming Lei Cc: Minchan Kim Cc: Alan Stern Cc: Oliver Neukum Cc: Jiri Kosina Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: Ingo Molnar Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Greg KH Cc: Jens Axboe Cc: "David S. Miller" Cc: Eric Dumazet Cc: David Decotigny Cc: Tom Herbert Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/power/runtime.c | 70 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm.h | 1 + include/linux/pm_runtime.h | 3 ++ 3 files changed, 74 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 3148b10dc2e5..cd92e1c77cb7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -124,6 +124,76 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration); +static int dev_memalloc_noio(struct device *dev, void *data) +{ + return dev->power.memalloc_noio; +} + +/* + * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag. + * @dev: Device to handle. + * @enable: True for setting the flag and False for clearing the flag. + * + * Set the flag for all devices in the path from the device to the + * root device in the device tree if @enable is true, otherwise clear + * the flag for devices in the path whose siblings don't set the flag. + * + * The function should only be called by block device, or network + * device driver for solving the deadlock problem during runtime + * resume/suspend: + * + * If memory allocation with GFP_KERNEL is called inside runtime + * resume/suspend callback of any one of its ancestors(or the + * block device itself), the deadlock may be triggered inside the + * memory allocation since it might not complete until the block + * device becomes active and the involed page I/O finishes. The + * situation is pointed out first by Alan Stern. Network device + * are involved in iSCSI kind of situation. + * + * The lock of dev_hotplug_mutex is held in the function for handling + * hotplug race because pm_runtime_set_memalloc_noio() may be called + * in async probe(). + * + * The function should be called between device_add() and device_del() + * on the affected device(block/network device). + */ +void pm_runtime_set_memalloc_noio(struct device *dev, bool enable) +{ + static DEFINE_MUTEX(dev_hotplug_mutex); + + mutex_lock(&dev_hotplug_mutex); + for (;;) { + bool enabled; + + /* hold power lock since bitfield is not SMP-safe. */ + spin_lock_irq(&dev->power.lock); + enabled = dev->power.memalloc_noio; + dev->power.memalloc_noio = enable; + spin_unlock_irq(&dev->power.lock); + + /* + * not need to enable ancestors any more if the device + * has been enabled. + */ + if (enabled && enable) + break; + + dev = dev->parent; + + /* + * clear flag of the parent device only if all the + * children don't set the flag because ancestor's + * flag was set by any one of the descendants. + */ + if (!dev || (!enable && + device_for_each_child(dev, NULL, + dev_memalloc_noio))) + break; + } + mutex_unlock(&dev_hotplug_mutex); +} +EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio); + /** * rpm_check_suspend_allowed - Test whether a device may be suspended. * @dev: Device to test. diff --git a/include/linux/pm.h b/include/linux/pm.h index 97bcf23e045a..e5d7230332a4 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -537,6 +537,7 @@ struct dev_pm_info { unsigned int irq_safe:1; unsigned int use_autosuspend:1; unsigned int timer_autosuspends:1; + unsigned int memalloc_noio:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index c785c215abfc..7d7e09efff9b 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -47,6 +47,7 @@ extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); +extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); static inline bool pm_children_suspended(struct device *dev) { @@ -156,6 +157,8 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) {} static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } +static inline void pm_runtime_set_memalloc_noio(struct device *dev, + bool enable){} #endif /* !CONFIG_PM_RUNTIME */ -- cgit v1.2.3 From cf82f3489cf414fe27c6c5af2b372fdcd2a58b24 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 22 Feb 2013 16:34:24 -0800 Subject: mm: remove unused memclear_highpage_flush() Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index ef788b5b4a35..7fb31da45d03 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -219,12 +219,6 @@ static inline void zero_user(struct page *page, zero_user_segments(page, start, start + size, 0, 0); } -static inline void __deprecated memclear_highpage_flush(struct page *page, - unsigned int offset, unsigned int size) -{ - zero_user(page, offset, size); -} - #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, -- cgit v1.2.3 From 34f0315adb58af3b01f59d05b2bce267474e71cb Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 22 Feb 2013 16:34:25 -0800 Subject: mm: numa: fix minor typo in numa_next_scan s/me/be/ and clarify the comment a bit when we're changing it anyway. Signed-off-by: Mel Gorman Suggested-by: Simon Jeons Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Ingo Molnar Cc: Wanpeng Li Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f8f5162a3571..47047cb4d8e5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -414,9 +414,9 @@ struct mm_struct { #endif #ifdef CONFIG_NUMA_BALANCING /* - * numa_next_scan is the next time when the PTEs will me marked - * pte_numa to gather statistics and migrate pages to new nodes - * if necessary + * numa_next_scan is the next time that the PTEs will be marked + * pte_numa. NUMA hinting faults will gather statistics and migrate + * pages to new nodes if necessary. */ unsigned long numa_next_scan; -- cgit v1.2.3 From 3c0ff4689630b280704666833e9539d84cddc373 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 22 Feb 2013 16:34:29 -0800 Subject: mm: numa: handle side-effects in count_vm_numa_events() for !CONFIG_NUMA_BALANCING The current definitions for count_vm_numa_events() is wrong for !CONFIG_NUMA_BALANCING as the following would miss the side-effect. count_vm_numa_events(NUMA_FOO, bar++); There are no such users of count_vm_numa_events() but this patch fixes it as it is a potential pitfall. Ideally both would be converted to static inline but NUMA_PTE_UPDATES is not defined if !CONFIG_NUMA_BALANCING and creating dummy constants just to have a static inline would be similarly clumsy. Signed-off-by: Mel Gorman Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Ingo Molnar Cc: Simon Jeons Cc: Wanpeng Li Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index a13291f7da88..5fd71a7d0dfd 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -85,7 +85,7 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_numa_events(x, y) count_vm_events(x, y) #else #define count_vm_numa_event(x) do {} while (0) -#define count_vm_numa_events(x, y) do {} while (0) +#define count_vm_numa_events(x, y) do { (void)(y); } while (0) #endif /* CONFIG_NUMA_BALANCING */ #define __count_zone_vm_events(item, zone, delta) \ -- cgit v1.2.3 From bbeae5b05ef6e40bf54db05ceb8635824153b9e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Feb 2013 16:34:30 -0800 Subject: mm: move page flags layout to separate header This is a preparation patch for moving page->_last_nid into page->flags that moves page flag layout information to a separate header. This patch is necessary because otherwise there would be a circular dependency between mm_types.h and mm.h. Signed-off-by: Mel Gorman Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Ingo Molnar Cc: Simon Jeons Cc: Wanpeng Li Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 40 ---------------------- include/linux/mm_types.h | 1 + include/linux/mmzone.h | 22 +----------- include/linux/page-flags-layout.h | 71 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 61 deletions(-) create mode 100644 include/linux/page-flags-layout.h (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a114b8eb7676..c2d7d5993b14 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -581,51 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) * sets it, so none of the operations on it need to be atomic. */ - -/* - * page->flags layout: - * - * There are three possibilities for how page->flags get - * laid out. The first is for the normal case, without - * sparsemem. The second is for sparsemem when there is - * plenty of space for node and section. The last is when - * we have run out of space and have to fall back to an - * alternate (slower) way of determining the node. - * - * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | - * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | - * classic sparse no space for node: | SECTION | ZONE | ... | FLAGS | - */ -#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) -#define SECTIONS_WIDTH SECTIONS_SHIFT -#else -#define SECTIONS_WIDTH 0 -#endif - -#define ZONES_WIDTH ZONES_SHIFT - -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS -#define NODES_WIDTH NODES_SHIFT -#else -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#error "Vmemmap: No space for nodes field in page flags" -#endif -#define NODES_WIDTH 0 -#endif - /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) -/* - * We are going to use the flags for the page to node mapping if its in - * there. This includes the case where there is no node, so it is implicit. - */ -#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) -#define NODE_NOT_IN_PAGE_FLAGS -#endif - /* * Define the bit shifts to access each section. For non-existent * sections we define the shift as 0; that plus a 0 mask ensures diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 47047cb4d8e5..d05d632c716e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4f4c8c26fa9d..6c80d0ac14dd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -310,24 +310,6 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H -/* - * When a memory allocation must conform to specific limitations (such - * as being suitable for DMA) the caller will pass in hints to the - * allocator in the gfp_mask, in the zone modifier bits. These bits - * are used to select a priority ordered list of memory zones which - * match the requested limits. See gfp_zone() in include/linux/gfp.h - */ - -#if MAX_NR_ZONES < 2 -#define ZONES_SHIFT 0 -#elif MAX_NR_ZONES <= 2 -#define ZONES_SHIFT 1 -#elif MAX_NR_ZONES <= 4 -#define ZONES_SHIFT 2 -#else -#error ZONES_SHIFT -- too many zones configured adjust calculation -#endif - struct zone { /* Fields commonly accessed by the page allocator */ @@ -1055,8 +1037,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ -#define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) - #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h new file mode 100644 index 000000000000..316805d6ba1b --- /dev/null +++ b/include/linux/page-flags-layout.h @@ -0,0 +1,71 @@ +#ifndef PAGE_FLAGS_LAYOUT_H +#define PAGE_FLAGS_LAYOUT_H + +#include +#include + +/* + * When a memory allocation must conform to specific limitations (such + * as being suitable for DMA) the caller will pass in hints to the + * allocator in the gfp_mask, in the zone modifier bits. These bits + * are used to select a priority ordered list of memory zones which + * match the requested limits. See gfp_zone() in include/linux/gfp.h + */ +#if MAX_NR_ZONES < 2 +#define ZONES_SHIFT 0 +#elif MAX_NR_ZONES <= 2 +#define ZONES_SHIFT 1 +#elif MAX_NR_ZONES <= 4 +#define ZONES_SHIFT 2 +#else +#error ZONES_SHIFT -- too many zones configured adjust calculation +#endif + +#ifdef CONFIG_SPARSEMEM +#include + +/* SECTION_SHIFT #bits space required to store a section # */ +#define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) + +#endif /* CONFIG_SPARSEMEM */ + +/* + * page->flags layout: + * + * There are three possibilities for how page->flags get + * laid out. The first is for the normal case, without + * sparsemem. The second is for sparsemem when there is + * plenty of space for node and section. The last is when + * we have run out of space and have to fall back to an + * alternate (slower) way of determining the node. + * + * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | + * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | + * classic sparse no space for node: | SECTION | ZONE | ... | FLAGS | + */ +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +#define SECTIONS_WIDTH SECTIONS_SHIFT +#else +#define SECTIONS_WIDTH 0 +#endif + +#define ZONES_WIDTH ZONES_SHIFT + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#define NODES_WIDTH NODES_SHIFT +#else +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#error "Vmemmap: No space for nodes field in page flags" +#endif +#define NODES_WIDTH 0 +#endif + +/* + * We are going to use the flags for the page to node mapping if its in + * there. This includes the case where there is no node, so it is implicit. + */ +#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) +#define NODE_NOT_IN_PAGE_FLAGS +#endif + +#endif /* _LINUX_PAGE_FLAGS_LAYOUT */ -- cgit v1.2.3 From 75980e97daccfc6babbac7e180ff118537955f5d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Feb 2013 16:34:32 -0800 Subject: mm: fold page->_last_nid into page->flags where possible page->_last_nid fits into page->flags on 64-bit. The unlikely 32-bit NUMA configuration with NUMA Balancing will still need an extra page field. As Peter notes "Completely dropping 32bit support for CONFIG_NUMA_BALANCING would simplify things, but it would also remove the warning if we grow enough 64bit only page-flags to push the last-cpu out." [mgorman@suse.de: minor modifications] Signed-off-by: Mel Gorman Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Ingo Molnar Cc: Simon Jeons Cc: Wanpeng Li Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 33 ++++++++++++++++++++++++++++++++- include/linux/mm_types.h | 2 +- include/linux/page-flags-layout.h | 33 +++++++++++++++++++++++++-------- mm/memory.c | 4 ++++ 4 files changed, 62 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c2d7d5993b14..473abbda942e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -581,10 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) * sets it, so none of the operations on it need to be atomic. */ -/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ +/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) +#define LAST_NID_PGOFF (ZONES_PGOFF - LAST_NID_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -594,6 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) +#define LAST_NID_PGSHIFT (LAST_NID_PGOFF * (LAST_NID_WIDTH != 0)) /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS @@ -615,6 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) +#define LAST_NID_MASK ((1UL << LAST_NID_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) @@ -654,6 +657,7 @@ static inline int page_to_nid(const struct page *page) #endif #ifdef CONFIG_NUMA_BALANCING +#ifdef LAST_NID_NOT_IN_PAGE_FLAGS static inline int page_xchg_last_nid(struct page *page, int nid) { return xchg(&page->_last_nid, nid); @@ -668,6 +672,33 @@ static inline void reset_page_last_nid(struct page *page) page->_last_nid = -1; } #else +static inline int page_last_nid(struct page *page) +{ + return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; +} + +static inline int page_xchg_last_nid(struct page *page, int nid) +{ + unsigned long old_flags, flags; + int last_nid; + + do { + old_flags = flags = page->flags; + last_nid = page_last_nid(page); + + flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); + flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; + } while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags)); + + return last_nid; +} + +static inline void reset_page_last_nid(struct page *page) +{ + page_xchg_last_nid(page, (1 << LAST_NID_SHIFT) - 1); +} +#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ +#else static inline int page_xchg_last_nid(struct page *page, int nid) { return page_to_nid(page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d05d632c716e..ace9a5f01c64 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -174,7 +174,7 @@ struct page { void *shadow; #endif -#ifdef CONFIG_NUMA_BALANCING +#ifdef LAST_NID_NOT_IN_PAGE_FLAGS int _last_nid; #endif } diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 316805d6ba1b..93506a114034 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -32,15 +32,16 @@ /* * page->flags layout: * - * There are three possibilities for how page->flags get - * laid out. The first is for the normal case, without - * sparsemem. The second is for sparsemem when there is - * plenty of space for node and section. The last is when - * we have run out of space and have to fall back to an - * alternate (slower) way of determining the node. + * There are five possibilities for how page->flags get laid out. The first + * pair is for the normal case without sparsemem. The second pair is for + * sparsemem when there is plenty of space for node and section information. + * The last is when there is insufficient space in page->flags and a separate + * lookup is necessary. * - * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | - * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | + * No sparsemem or sparsemem vmemmap: | NODE | ZONE | ... | FLAGS | + * " plus space for last_nid: | NODE | ZONE | LAST_NID ... | FLAGS | + * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS | + * " plus space for last_nid: | SECTION | NODE | ZONE | LAST_NID ... | FLAGS | * classic sparse no space for node: | SECTION | ZONE | ... | FLAGS | */ #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) @@ -60,6 +61,18 @@ #define NODES_WIDTH 0 #endif +#ifdef CONFIG_NUMA_BALANCING +#define LAST_NID_SHIFT NODES_SHIFT +#else +#define LAST_NID_SHIFT 0 +#endif + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#define LAST_NID_WIDTH LAST_NID_SHIFT +#else +#define LAST_NID_WIDTH 0 +#endif + /* * We are going to use the flags for the page to node mapping if its in * there. This includes the case where there is no node, so it is implicit. @@ -68,4 +81,8 @@ #define NODE_NOT_IN_PAGE_FLAGS #endif +#if defined(CONFIG_NUMA_BALANCING) && LAST_NID_WIDTH == 0 +#define LAST_NID_NOT_IN_PAGE_FLAGS +#endif + #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/mm/memory.c b/mm/memory.c index 7837ceacf090..054250ee4a68 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -69,6 +69,10 @@ #include "internal.h" +#ifdef LAST_NID_NOT_IN_PAGE_FLAGS +#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid. +#endif + #ifndef CONFIG_NEED_MULTIPLE_NODES /* use the per-pgdat data instead for discontigmem - mbligh */ unsigned long max_mapnr; -- cgit v1.2.3 From 9800339b5e0f0e24ab3dac349e0de80d2018832e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 22 Feb 2013 16:34:35 -0800 Subject: mm: don't inline page_mapping() According to akpm, this saves 1/2k text and makes things simple for the next patch. Numbers from Minchan: add/remove: 1/0 grow/shrink: 6/22 up/down: 92/-516 (-424) function old new delta page_mapping - 48 +48 do_task_stat 2292 2308 +16 page_remove_rmap 240 248 +8 load_elf_binary 4500 4508 +8 update_queue 532 536 +4 scsi_probe_and_add_lun 2892 2896 +4 lookup_fast 644 648 +4 vcs_read 1040 1036 -4 __ip_route_output_key 1904 1900 -4 ip_route_input_noref 2508 2500 -8 shmem_file_aio_read 784 772 -12 __isolate_lru_page 272 256 -16 shmem_replace_page 708 688 -20 mark_buffer_dirty 228 208 -20 __set_page_dirty_buffers 240 220 -20 __remove_mapping 276 256 -20 update_mmu_cache 500 476 -24 set_page_dirty_balance 92 68 -24 set_page_dirty 172 148 -24 page_evictable 88 64 -24 page_cache_pipe_buf_steal 248 224 -24 clear_page_dirty_for_io 340 316 -24 test_set_page_writeback 400 372 -28 test_clear_page_writeback 516 488 -28 invalidate_inode_page 156 128 -28 page_mkclean 432 400 -32 flush_dcache_page 360 328 -32 __set_page_dirty_nobuffers 324 280 -44 shrink_page_list 2412 2356 -56 Signed-off-by: Shaohua Li Suggested-by: Andrew Morton Cc: Hugh Dickins Acked-by: Rik van Riel Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 13 +------------ mm/util.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 473abbda942e..1d4122bf6f27 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -809,18 +809,7 @@ void page_address_init(void); #define PAGE_MAPPING_KSM 2 #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) -extern struct address_space swapper_space; -static inline struct address_space *page_mapping(struct page *page) -{ - struct address_space *mapping = page->mapping; - - VM_BUG_ON(PageSlab(page)); - if (unlikely(PageSwapCache(page))) - mapping = &swapper_space; - else if ((unsigned long)mapping & PAGE_MAPPING_ANON) - mapping = NULL; - return mapping; -} +extern struct address_space *page_mapping(struct page *page); /* Neutral page->mapping pointer to address_space or anon_vma or other */ static inline void *page_rmapping(struct page *page) diff --git a/mm/util.c b/mm/util.c index 3704bf1bef94..16a73195a37b 100644 --- a/mm/util.c +++ b/mm/util.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "internal.h" @@ -382,6 +383,21 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, } EXPORT_SYMBOL(vm_mmap); +struct address_space *page_mapping(struct page *page) +{ + struct address_space *mapping = page->mapping; + + VM_BUG_ON(PageSlab(page)); +#ifdef CONFIG_SWAP + if (unlikely(PageSwapCache(page))) + mapping = &swapper_space; + else +#endif + if ((unsigned long)mapping & PAGE_MAPPING_ANON) + mapping = NULL; + return mapping; +} + /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); -- cgit v1.2.3 From 33806f06da654092182410d974b6d3c5396ea3eb Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 22 Feb 2013 16:34:37 -0800 Subject: swap: make each swap partition have one address_space When I use several fast SSD to do swap, swapper_space.tree_lock is heavily contended. This makes each swap partition have one address_space to reduce the lock contention. There is an array of address_space for swap. The swap entry type is the index to the array. In my test with 3 SSD, this increases the swapout throughput 20%. [akpm@linux-foundation.org: revert unneeded change to __add_to_swap_cache] Signed-off-by: Shaohua Li Cc: Hugh Dickins Acked-by: Rik van Riel Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 4 ++-- include/linux/swap.h | 9 +++++---- mm/memcontrol.c | 4 ++-- mm/mincore.c | 5 +++-- mm/swap.c | 9 +++++++-- mm/swap_state.c | 55 ++++++++++++++++++++++++++++++++++++---------------- mm/swapfile.c | 5 +++-- mm/util.c | 10 +++++++--- 8 files changed, 67 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index c3dac611c3c0..1efaaa19c4f3 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) * sysctl_overcommit_ratio / 100) + total_swap_pages; cached = global_page_state(NR_FILE_PAGES) - - total_swapcache_pages - i.bufferram; + total_swapcache_pages() - i.bufferram; if (cached < 0) cached = 0; @@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeram), K(i.bufferram), K(cached), - K(total_swapcache_pages), + K(total_swapcache_pages()), K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), K(pages[LRU_ACTIVE_ANON]), diff --git a/include/linux/swap.h b/include/linux/swap.h index 8c66486a8ca8..235c039892ee 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -8,7 +8,7 @@ #include #include #include - +#include #include #include @@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); /* linux/mm/swap_state.c */ -extern struct address_space swapper_space; -#define total_swapcache_pages swapper_space.nrpages +extern struct address_space swapper_spaces[]; +#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) +extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); @@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) #define nr_swap_pages 0L #define total_swap_pages 0L -#define total_swapcache_pages 0UL +#define total_swapcache_pages() 0UL #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c878b1c69510..f85861531f22 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6307,7 +6307,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, * Because lookup_swap_cache() updates some statistics counter, * we call find_get_page() with swapper_space directly. */ - page = find_get_page(&swapper_space, ent.val); + page = find_get_page(swap_address_space(ent), ent.val); if (do_swap_account) entry->val = ent.val; @@ -6348,7 +6348,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, swp_entry_t swap = radix_to_swp_entry(page); if (do_swap_account) *entry = swap; - page = find_get_page(&swapper_space, swap.val); + page = find_get_page(swap_address_space(swap), swap.val); } #endif return page; diff --git a/mm/mincore.c b/mm/mincore.c index 936b4cee8cb1..da2be56a7b8f 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) /* shmem/tmpfs may return swap: account for swapcache page too. */ if (radix_tree_exceptional_entry(page)) { swp_entry_t swap = radix_to_swp_entry(page); - page = find_get_page(&swapper_space, swap.val); + page = find_get_page(swap_address_space(swap), swap.val); } #endif if (page) { @@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } else { #ifdef CONFIG_SWAP pgoff = entry.val; - *vec = mincore_page(&swapper_space, pgoff); + *vec = mincore_page(swap_address_space(entry), + pgoff); #else WARN_ON(1); *vec = 1; diff --git a/mm/swap.c b/mm/swap.c index 6310dc2008ff..8a529a01e8fc 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag); void __init swap_setup(void) { unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); - #ifdef CONFIG_SWAP - bdi_init(swapper_space.backing_dev_info); + int i; + + bdi_init(swapper_spaces[0].backing_dev_info); + for (i = 0; i < MAX_SWAPFILES; i++) { + spin_lock_init(&swapper_spaces[i].tree_lock); + INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); + } #endif /* Use a smaller cluster for small-memory machines */ diff --git a/mm/swap_state.c b/mm/swap_state.c index 0cb36fb1f61c..8d6644c5d0cc 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -36,12 +36,12 @@ static struct backing_dev_info swap_backing_dev_info = { .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, }; -struct address_space swapper_space = { - .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), - .a_ops = &swap_aops, - .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), - .backing_dev_info = &swap_backing_dev_info, +struct address_space swapper_spaces[MAX_SWAPFILES] = { + [0 ... MAX_SWAPFILES - 1] = { + .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), + .a_ops = &swap_aops, + .backing_dev_info = &swap_backing_dev_info, + } }; #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) @@ -53,9 +53,19 @@ static struct { unsigned long find_total; } swap_cache_info; +unsigned long total_swapcache_pages(void) +{ + int i; + unsigned long ret = 0; + + for (i = 0; i < MAX_SWAPFILES; i++) + ret += swapper_spaces[i].nrpages; + return ret; +} + void show_swap_cache_info(void) { - printk("%lu pages in swap cache\n", total_swapcache_pages); + printk("%lu pages in swap cache\n", total_swapcache_pages()); printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", swap_cache_info.add_total, swap_cache_info.del_total, swap_cache_info.find_success, swap_cache_info.find_total); @@ -70,6 +80,7 @@ void show_swap_cache_info(void) static int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error; + struct address_space *address_space; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageSwapCache(page)); @@ -79,14 +90,16 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry) SetPageSwapCache(page); set_page_private(page, entry.val); - spin_lock_irq(&swapper_space.tree_lock); - error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); + address_space = swap_address_space(entry); + spin_lock_irq(&address_space->tree_lock); + error = radix_tree_insert(&address_space->page_tree, + entry.val, page); if (likely(!error)) { - total_swapcache_pages++; + address_space->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(add_total); } - spin_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&address_space->tree_lock); if (unlikely(error)) { /* @@ -122,14 +135,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) */ void __delete_from_swap_cache(struct page *page) { + swp_entry_t entry; + struct address_space *address_space; + VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageSwapCache(page)); VM_BUG_ON(PageWriteback(page)); - radix_tree_delete(&swapper_space.page_tree, page_private(page)); + entry.val = page_private(page); + address_space = swap_address_space(entry); + radix_tree_delete(&address_space->page_tree, page_private(page)); set_page_private(page, 0); ClearPageSwapCache(page); - total_swapcache_pages--; + address_space->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); } @@ -195,12 +213,14 @@ int add_to_swap(struct page *page) void delete_from_swap_cache(struct page *page) { swp_entry_t entry; + struct address_space *address_space; entry.val = page_private(page); - spin_lock_irq(&swapper_space.tree_lock); + address_space = swap_address_space(entry); + spin_lock_irq(&address_space->tree_lock); __delete_from_swap_cache(page); - spin_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&address_space->tree_lock); swapcache_free(entry, page); page_cache_release(page); @@ -263,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry) { struct page *page; - page = find_get_page(&swapper_space, entry.val); + page = find_get_page(swap_address_space(entry), entry.val); if (page) INC_CACHE_INFO(find_success); @@ -290,7 +310,8 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * called after lookup_swap_cache() failed, re-calling * that would confuse statistics. */ - found_page = find_get_page(&swapper_space, entry.val); + found_page = find_get_page(swap_address_space(entry), + entry.val); if (found_page) break; diff --git a/mm/swapfile.c b/mm/swapfile.c index e97a0e5aea91..e51864e6fe8b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -79,7 +79,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) struct page *page; int ret = 0; - page = find_get_page(&swapper_space, entry.val); + page = find_get_page(swap_address_space(entry), entry.val); if (!page) return 0; /* @@ -699,7 +699,8 @@ int free_swap_and_cache(swp_entry_t entry) p = swap_info_get(entry); if (p) { if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { - page = find_get_page(&swapper_space, entry.val); + page = find_get_page(swap_address_space(entry), + entry.val); if (page && !trylock_page(page)) { page_cache_release(page); page = NULL; diff --git a/mm/util.c b/mm/util.c index 16a73195a37b..ab1424dbe2e6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "internal.h" @@ -389,9 +390,12 @@ struct address_space *page_mapping(struct page *page) VM_BUG_ON(PageSlab(page)); #ifdef CONFIG_SWAP - if (unlikely(PageSwapCache(page))) - mapping = &swapper_space; - else + if (unlikely(PageSwapCache(page))) { + swp_entry_t entry; + + entry.val = page_private(page); + mapping = swap_address_space(entry); + } else #endif if ((unsigned long)mapping & PAGE_MAPPING_ANON) mapping = NULL; -- cgit v1.2.3 From ec8acf20afb8534ed511f6613dd2226b9e301010 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 22 Feb 2013 16:34:38 -0800 Subject: swap: add per-partition lock for swapfile swap_lock is heavily contended when I test swap to 3 fast SSD (even slightly slower than swap to 2 such SSD). The main contention comes from swap_info_get(). This patch tries to fix the gap with adding a new per-partition lock. Global data like nr_swapfiles, total_swap_pages, least_priority and swap_list are still protected by swap_lock. nr_swap_pages is an atomic now, it can be changed without swap_lock. In theory, it's possible get_swap_page() finds no swap pages but actually there are free swap pages. But sounds not a big problem. Accessing partition specific data (like scan_swap_map and so on) is only protected by swap_info_struct.lock. Changing swap_info_struct.flags need hold swap_lock and swap_info_struct.lock, because scan_scan_map() will check it. read the flags is ok with either the locks hold. If both swap_lock and swap_info_struct.lock must be hold, we always hold the former first to avoid deadlock. swap_entry_free() can change swap_list. To delete that code, we add a new highest_priority_index. Whenever get_swap_page() is called, we check it. If it's valid, we use it. It's a pity get_swap_page() still holds swap_lock(). But in practice, swap_lock() isn't heavily contended in my test with this patch (or I can say there are other much more heavier bottlenecks like TLB flush). And BTW, looks get_swap_page() doesn't really need the lock. We never free swap_info[] and we check SWAP_WRITEOK flag. The only risk without the lock is we could swapout to some low priority swap, but we can quickly recover after several rounds of swap, so sounds not a big deal to me. But I'd prefer to fix this if it's a real problem. "swap: make each swap partition have one address_space" improved the swapout speed from 1.7G/s to 2G/s. This patch further improves the speed to 2.3G/s, so around 15% improvement. It's a multi-process test, so TLB flush isn't the biggest bottleneck before the patches. [arnd@arndb.de: fix it for nommu] [hughd@google.com: add missing unlock] [minchan@kernel.org: get rid of lockdep whinge on sys_swapon] Signed-off-by: Shaohua Li Cc: Hugh Dickins Cc: Rik van Riel Cc: Minchan Kim Cc: Greg Kroah-Hartman Cc: Seth Jennings Cc: Konrad Rzeszutek Wilk Cc: Xiao Guangrong Cc: Dan Magenheimer Cc: Stephen Rothwell Signed-off-by: Arnd Bergmann Signed-off-by: Hugh Dickins Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/mm/init_32.c | 2 +- arch/tile/mm/pgtable.c | 2 +- include/linux/swap.h | 32 ++++++++-- mm/mmap.c | 2 +- mm/nommu.c | 2 +- mm/swap_state.c | 3 +- mm/swapfile.c | 154 +++++++++++++++++++++++++++++++++--------------- mm/vmscan.c | 8 +-- 8 files changed, 145 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index dde85ef1c56d..48e0c030e8f5 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -57,7 +57,7 @@ void show_mem(unsigned int filter) printk("Mem-info:\n"); show_free_areas(filter); printk("Free swap: %6ldkB\n", - nr_swap_pages << (PAGE_SHIFT-10)); + get_nr_swap_pages() << (PAGE_SHIFT-10)); printk("%ld pages of RAM\n", totalram_pages); printk("%ld free pages\n", nr_free_pages()); } diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index de0de0c0e8a1..b3b4972c2451 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -61,7 +61,7 @@ void show_mem(unsigned int filter) global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE), global_page_state(NR_FILE_PAGES), - nr_swap_pages); + get_nr_swap_pages()); for_each_zone(zone) { unsigned long flags, order, total = 0, largest_order = -1; diff --git a/include/linux/swap.h b/include/linux/swap.h index 235c039892ee..a3e22d357e91 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -202,6 +202,18 @@ struct swap_info_struct { unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ atomic_t frontswap_pages; /* frontswap pages in-use counter */ #endif + spinlock_t lock; /* + * protect map scan related fields like + * swap_map, lowest_bit, highest_bit, + * inuse_pages, cluster_next, + * cluster_nr, lowest_alloc and + * highest_alloc. other fields are only + * changed at swapon/swapoff, so are + * protected by swap_lock. changing + * flags need hold this lock and + * swap_lock. If both locks need hold, + * hold swap_lock first. + */ }; struct swap_list_t { @@ -209,9 +221,6 @@ struct swap_list_t { int next; /* swapfile to be used next */ }; -/* Swap 50% full? Release swapcache more aggressively.. */ -#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) - /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; @@ -347,8 +356,20 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); /* linux/mm/swapfile.c */ -extern long nr_swap_pages; +extern atomic_long_t nr_swap_pages; extern long total_swap_pages; + +/* Swap 50% full? Release swapcache more aggressively.. */ +static inline bool vm_swap_full(void) +{ + return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages; +} + +static inline long get_nr_swap_pages(void) +{ + return atomic_long_read(&nr_swap_pages); +} + extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); extern swp_entry_t get_swap_page_of_type(int); @@ -381,9 +402,10 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) #else /* CONFIG_SWAP */ -#define nr_swap_pages 0L +#define get_nr_swap_pages() 0L #define total_swap_pages 0L #define total_swapcache_pages() 0UL +#define vm_swap_full() 0 #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) diff --git a/mm/mmap.c b/mm/mmap.c index 44bb4d869884..28416f6b8dd5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -144,7 +144,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) */ free -= global_page_state(NR_SHMEM); - free += nr_swap_pages; + free += get_nr_swap_pages(); /* * Any slabs which are created with the diff --git a/mm/nommu.c b/mm/nommu.c index 18c1b932e2c4..87854a55829d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1907,7 +1907,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) */ free -= global_page_state(NR_SHMEM); - free += nr_swap_pages; + free += get_nr_swap_pages(); /* * Any slabs which are created with the diff --git a/mm/swap_state.c b/mm/swap_state.c index 8d6644c5d0cc..7efcf1525921 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -69,7 +69,8 @@ void show_swap_cache_info(void) printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", swap_cache_info.add_total, swap_cache_info.del_total, swap_cache_info.find_success, swap_cache_info.find_total); - printk("Free swap = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10)); + printk("Free swap = %ldkB\n", + get_nr_swap_pages() << (PAGE_SHIFT - 10)); printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); } diff --git a/mm/swapfile.c b/mm/swapfile.c index e51864e6fe8b..9b51266413cd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -47,9 +47,11 @@ static sector_t map_swap_entry(swp_entry_t, struct block_device**); DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; -long nr_swap_pages; +atomic_long_t nr_swap_pages; +/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ long total_swap_pages; static int least_priority; +static atomic_t highest_priority_index = ATOMIC_INIT(-1); static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; @@ -223,7 +225,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, si->lowest_alloc = si->max; si->highest_alloc = 0; } - spin_unlock(&swap_lock); + spin_unlock(&si->lock); /* * If seek is expensive, start searching for new cluster from @@ -242,7 +244,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, if (si->swap_map[offset]) last_in_cluster = offset + SWAPFILE_CLUSTER; else if (offset == last_in_cluster) { - spin_lock(&swap_lock); + spin_lock(&si->lock); offset -= SWAPFILE_CLUSTER - 1; si->cluster_next = offset; si->cluster_nr = SWAPFILE_CLUSTER - 1; @@ -263,7 +265,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, if (si->swap_map[offset]) last_in_cluster = offset + SWAPFILE_CLUSTER; else if (offset == last_in_cluster) { - spin_lock(&swap_lock); + spin_lock(&si->lock); offset -= SWAPFILE_CLUSTER - 1; si->cluster_next = offset; si->cluster_nr = SWAPFILE_CLUSTER - 1; @@ -277,7 +279,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, } offset = scan_base; - spin_lock(&swap_lock); + spin_lock(&si->lock); si->cluster_nr = SWAPFILE_CLUSTER - 1; si->lowest_alloc = 0; } @@ -293,9 +295,9 @@ checks: /* reuse swap entry of cache-only swap if not busy. */ if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { int swap_was_freed; - spin_unlock(&swap_lock); + spin_unlock(&si->lock); swap_was_freed = __try_to_reclaim_swap(si, offset); - spin_lock(&swap_lock); + spin_lock(&si->lock); /* entry was freed successfully, try to use this again */ if (swap_was_freed) goto checks; @@ -335,13 +337,13 @@ checks: si->lowest_alloc <= last_in_cluster) last_in_cluster = si->lowest_alloc - 1; si->flags |= SWP_DISCARDING; - spin_unlock(&swap_lock); + spin_unlock(&si->lock); if (offset < last_in_cluster) discard_swap_cluster(si, offset, last_in_cluster - offset + 1); - spin_lock(&swap_lock); + spin_lock(&si->lock); si->lowest_alloc = 0; si->flags &= ~SWP_DISCARDING; @@ -355,10 +357,10 @@ checks: * could defer that delay until swap_writepage, * but it's easier to keep this self-contained. */ - spin_unlock(&swap_lock); + spin_unlock(&si->lock); wait_on_bit(&si->flags, ilog2(SWP_DISCARDING), wait_for_discard, TASK_UNINTERRUPTIBLE); - spin_lock(&swap_lock); + spin_lock(&si->lock); } else { /* * Note pages allocated by racing tasks while @@ -374,14 +376,14 @@ checks: return offset; scan: - spin_unlock(&swap_lock); + spin_unlock(&si->lock); while (++offset <= si->highest_bit) { if (!si->swap_map[offset]) { - spin_lock(&swap_lock); + spin_lock(&si->lock); goto checks; } if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { - spin_lock(&swap_lock); + spin_lock(&si->lock); goto checks; } if (unlikely(--latency_ration < 0)) { @@ -392,11 +394,11 @@ scan: offset = si->lowest_bit; while (++offset < scan_base) { if (!si->swap_map[offset]) { - spin_lock(&swap_lock); + spin_lock(&si->lock); goto checks; } if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { - spin_lock(&swap_lock); + spin_lock(&si->lock); goto checks; } if (unlikely(--latency_ration < 0)) { @@ -404,7 +406,7 @@ scan: latency_ration = LATENCY_LIMIT; } } - spin_lock(&swap_lock); + spin_lock(&si->lock); no_page: si->flags -= SWP_SCANNING; @@ -417,13 +419,34 @@ swp_entry_t get_swap_page(void) pgoff_t offset; int type, next; int wrapped = 0; + int hp_index; spin_lock(&swap_lock); - if (nr_swap_pages <= 0) + if (atomic_long_read(&nr_swap_pages) <= 0) goto noswap; - nr_swap_pages--; + atomic_long_dec(&nr_swap_pages); for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { + hp_index = atomic_xchg(&highest_priority_index, -1); + /* + * highest_priority_index records current highest priority swap + * type which just frees swap entries. If its priority is + * higher than that of swap_list.next swap type, we use it. It + * isn't protected by swap_lock, so it can be an invalid value + * if the corresponding swap type is swapoff. We double check + * the flags here. It's even possible the swap type is swapoff + * and swapon again and its priority is changed. In such rare + * case, low prority swap type might be used, but eventually + * high priority swap will be used after several rounds of + * swap. + */ + if (hp_index != -1 && hp_index != type && + swap_info[type]->prio < swap_info[hp_index]->prio && + (swap_info[hp_index]->flags & SWP_WRITEOK)) { + type = hp_index; + swap_list.next = type; + } + si = swap_info[type]; next = si->next; if (next < 0 || @@ -432,22 +455,29 @@ swp_entry_t get_swap_page(void) wrapped++; } - if (!si->highest_bit) + spin_lock(&si->lock); + if (!si->highest_bit) { + spin_unlock(&si->lock); continue; - if (!(si->flags & SWP_WRITEOK)) + } + if (!(si->flags & SWP_WRITEOK)) { + spin_unlock(&si->lock); continue; + } swap_list.next = next; + + spin_unlock(&swap_lock); /* This is called for allocating swap entry for cache */ offset = scan_swap_map(si, SWAP_HAS_CACHE); - if (offset) { - spin_unlock(&swap_lock); + spin_unlock(&si->lock); + if (offset) return swp_entry(type, offset); - } + spin_lock(&swap_lock); next = swap_list.next; } - nr_swap_pages++; + atomic_long_inc(&nr_swap_pages); noswap: spin_unlock(&swap_lock); return (swp_entry_t) {0}; @@ -459,19 +489,19 @@ swp_entry_t get_swap_page_of_type(int type) struct swap_info_struct *si; pgoff_t offset; - spin_lock(&swap_lock); si = swap_info[type]; + spin_lock(&si->lock); if (si && (si->flags & SWP_WRITEOK)) { - nr_swap_pages--; + atomic_long_dec(&nr_swap_pages); /* This is called for allocating swap entry, not cache */ offset = scan_swap_map(si, 1); if (offset) { - spin_unlock(&swap_lock); + spin_unlock(&si->lock); return swp_entry(type, offset); } - nr_swap_pages++; + atomic_long_inc(&nr_swap_pages); } - spin_unlock(&swap_lock); + spin_unlock(&si->lock); return (swp_entry_t) {0}; } @@ -493,7 +523,7 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry) goto bad_offset; if (!p->swap_map[offset]) goto bad_free; - spin_lock(&swap_lock); + spin_lock(&p->lock); return p; bad_free: @@ -511,6 +541,27 @@ out: return NULL; } +/* + * This swap type frees swap entry, check if it is the highest priority swap + * type which just frees swap entry. get_swap_page() uses + * highest_priority_index to search highest priority swap type. The + * swap_info_struct.lock can't protect us if there are multiple swap types + * active, so we use atomic_cmpxchg. + */ +static void set_highest_priority_index(int type) +{ + int old_hp_index, new_hp_index; + + do { + old_hp_index = atomic_read(&highest_priority_index); + if (old_hp_index != -1 && + swap_info[old_hp_index]->prio >= swap_info[type]->prio) + break; + new_hp_index = type; + } while (atomic_cmpxchg(&highest_priority_index, + old_hp_index, new_hp_index) != old_hp_index); +} + static unsigned char swap_entry_free(struct swap_info_struct *p, swp_entry_t entry, unsigned char usage) { @@ -553,10 +604,8 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, p->lowest_bit = offset; if (offset > p->highest_bit) p->highest_bit = offset; - if (swap_list.next >= 0 && - p->prio > swap_info[swap_list.next]->prio) - swap_list.next = p->type; - nr_swap_pages++; + set_highest_priority_index(p->type); + atomic_long_inc(&nr_swap_pages); p->inuse_pages--; frontswap_invalidate_page(p->type, offset); if (p->flags & SWP_BLKDEV) { @@ -581,7 +630,7 @@ void swap_free(swp_entry_t entry) p = swap_info_get(entry); if (p) { swap_entry_free(p, entry, 1); - spin_unlock(&swap_lock); + spin_unlock(&p->lock); } } @@ -598,7 +647,7 @@ void swapcache_free(swp_entry_t entry, struct page *page) count = swap_entry_free(p, entry, SWAP_HAS_CACHE); if (page) mem_cgroup_uncharge_swapcache(page, entry, count != 0); - spin_unlock(&swap_lock); + spin_unlock(&p->lock); } } @@ -617,7 +666,7 @@ int page_swapcount(struct page *page) p = swap_info_get(entry); if (p) { count = swap_count(p->swap_map[swp_offset(entry)]); - spin_unlock(&swap_lock); + spin_unlock(&p->lock); } return count; } @@ -706,7 +755,7 @@ int free_swap_and_cache(swp_entry_t entry) page = NULL; } } - spin_unlock(&swap_lock); + spin_unlock(&p->lock); } if (page) { /* @@ -804,11 +853,13 @@ unsigned int count_swap_pages(int type, int free) if ((unsigned int)type < nr_swapfiles) { struct swap_info_struct *sis = swap_info[type]; + spin_lock(&sis->lock); if (sis->flags & SWP_WRITEOK) { n = sis->pages; if (free) n -= sis->inuse_pages; } + spin_unlock(&sis->lock); } spin_unlock(&swap_lock); return n; @@ -1457,7 +1508,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, p->swap_map = swap_map; frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; - nr_swap_pages += p->pages; + atomic_long_add(p->pages, &nr_swap_pages); total_swap_pages += p->pages; /* insert swap space into swap_list: */ @@ -1479,15 +1530,19 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, unsigned long *frontswap_map) { spin_lock(&swap_lock); + spin_lock(&p->lock); _enable_swap_info(p, prio, swap_map, frontswap_map); frontswap_init(p->type); + spin_unlock(&p->lock); spin_unlock(&swap_lock); } static void reinsert_swap_info(struct swap_info_struct *p) { spin_lock(&swap_lock); + spin_lock(&p->lock); _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); + spin_unlock(&p->lock); spin_unlock(&swap_lock); } @@ -1547,14 +1602,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) /* just pick something that's safe... */ swap_list.next = swap_list.head; } + spin_lock(&p->lock); if (p->prio < 0) { for (i = p->next; i >= 0; i = swap_info[i]->next) swap_info[i]->prio = p->prio--; least_priority++; } - nr_swap_pages -= p->pages; + atomic_long_sub(p->pages, &nr_swap_pages); total_swap_pages -= p->pages; p->flags &= ~SWP_WRITEOK; + spin_unlock(&p->lock); spin_unlock(&swap_lock); set_current_oom_origin(); @@ -1573,14 +1630,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) mutex_lock(&swapon_mutex); spin_lock(&swap_lock); + spin_lock(&p->lock); drain_mmlist(); /* wait for anyone still in scan_swap_map */ p->highest_bit = 0; /* cuts scans short */ while (p->flags >= SWP_SCANNING) { + spin_unlock(&p->lock); spin_unlock(&swap_lock); schedule_timeout_uninterruptible(1); spin_lock(&swap_lock); + spin_lock(&p->lock); } swap_file = p->swap_file; @@ -1590,6 +1650,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) p->swap_map = NULL; p->flags = 0; frontswap_invalidate_area(type); + spin_unlock(&p->lock); spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); @@ -1795,6 +1856,7 @@ static struct swap_info_struct *alloc_swap_info(void) p->flags = SWP_USED; p->next = -1; spin_unlock(&swap_lock); + spin_lock_init(&p->lock); return p; } @@ -2117,7 +2179,7 @@ void si_swapinfo(struct sysinfo *val) if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) nr_to_be_unused += si->inuse_pages; } - val->freeswap = nr_swap_pages + nr_to_be_unused; + val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; spin_unlock(&swap_lock); } @@ -2150,7 +2212,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) p = swap_info[type]; offset = swp_offset(entry); - spin_lock(&swap_lock); + spin_lock(&p->lock); if (unlikely(offset >= p->max)) goto unlock_out; @@ -2185,7 +2247,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) p->swap_map[offset] = count | has_cache; unlock_out: - spin_unlock(&swap_lock); + spin_unlock(&p->lock); out: return err; @@ -2310,7 +2372,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) } if (!page) { - spin_unlock(&swap_lock); + spin_unlock(&si->lock); return -ENOMEM; } @@ -2358,7 +2420,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) list_add_tail(&page->lru, &head->lru); page = NULL; /* now it's attached, don't free it */ out: - spin_unlock(&swap_lock); + spin_unlock(&si->lock); outer: if (page) __free_page(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index a68fa20269d9..b7d8015a6d54 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1684,7 +1684,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, force_scan = true; /* If we have no swap space, do not bother scanning anon pages. */ - if (!sc->may_swap || (nr_swap_pages <= 0)) { + if (!sc->may_swap || (get_nr_swap_pages() <= 0)) { scan_balance = SCAN_FILE; goto out; } @@ -1933,7 +1933,7 @@ static inline bool should_continue_reclaim(struct zone *zone, */ pages_for_compaction = (2UL << sc->order); inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE); - if (nr_swap_pages > 0) + if (get_nr_swap_pages() > 0) inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) @@ -3085,7 +3085,7 @@ unsigned long global_reclaimable_pages(void) nr = global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_FILE); - if (nr_swap_pages > 0) + if (get_nr_swap_pages() > 0) nr += global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_INACTIVE_ANON); @@ -3099,7 +3099,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone) nr = zone_page_state(zone, NR_ACTIVE_FILE) + zone_page_state(zone, NR_INACTIVE_FILE); - if (nr_swap_pages > 0) + if (get_nr_swap_pages() > 0) nr += zone_page_state(zone, NR_ACTIVE_ANON) + zone_page_state(zone, NR_INACTIVE_ANON); -- cgit v1.2.3 From 08b52706d505658eac0962d215ff697f898bbc13 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 22 Feb 2013 16:34:40 -0800 Subject: mm/rmap: rename anon_vma_unlock() => anon_vma_unlock_write() The comment in commit 4fc3f1d66b1e ("mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable") says: | Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(), | to make it clearer that it's an exclusive write-lock in | that case - suggested by Rik van Riel. But that commit renames only anon_vma_lock() Signed-off-by: Konstantin Khlebnikov Cc: Ingo Molnar Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- include/linux/rmap.h | 2 +- mm/huge_memory.c | 6 +++--- mm/mmap.c | 4 ++-- mm/mremap.c | 2 +- mm/rmap.c | 6 +++--- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1d76f8ca90f0..ee1c244a62a1 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -113,7 +113,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, do { \ pmd_t *____pmd = (__pmd); \ anon_vma_lock_write(__anon_vma); \ - anon_vma_unlock(__anon_vma); \ + anon_vma_unlock_write(__anon_vma); \ BUG_ON(pmd_trans_splitting(*____pmd) || \ pmd_trans_huge(*____pmd)); \ } while (0) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c20635c527a9..6dacb93a6d94 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -123,7 +123,7 @@ static inline void anon_vma_lock_write(struct anon_vma *anon_vma) down_write(&anon_vma->root->rwsem); } -static inline void anon_vma_unlock(struct anon_vma *anon_vma) +static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) { up_write(&anon_vma->root->rwsem); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f40b2ce23d60..b1cc6591ed83 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1830,7 +1830,7 @@ int split_huge_page(struct page *page) BUG_ON(PageCompound(page)); out_unlock: - anon_vma_unlock(anon_vma); + anon_vma_unlock_write(anon_vma); put_anon_vma(anon_vma); out: return ret; @@ -2322,7 +2322,7 @@ static void collapse_huge_page(struct mm_struct *mm, BUG_ON(!pmd_none(*pmd)); set_pmd_at(mm, address, pmd, _pmd); spin_unlock(&mm->page_table_lock); - anon_vma_unlock(vma->anon_vma); + anon_vma_unlock_write(vma->anon_vma); goto out; } @@ -2330,7 +2330,7 @@ static void collapse_huge_page(struct mm_struct *mm, * All pages are isolated and locked so anon_vma rmap * can't run anymore. */ - anon_vma_unlock(vma->anon_vma); + anon_vma_unlock_write(vma->anon_vma); __collapse_huge_page_copy(pte, new_page, vma, address, ptl); pte_unmap(pte); diff --git a/mm/mmap.c b/mm/mmap.c index 28416f6b8dd5..318e121affda 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -809,7 +809,7 @@ again: remove_next = 1 + (end > next->vm_end); anon_vma_interval_tree_post_update_vma(vma); if (adjust_next) anon_vma_interval_tree_post_update_vma(next); - anon_vma_unlock(anon_vma); + anon_vma_unlock_write(anon_vma); } if (mapping) mutex_unlock(&mapping->i_mmap_mutex); @@ -3017,7 +3017,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) if (!__test_and_clear_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) BUG(); - anon_vma_unlock(anon_vma); + anon_vma_unlock_write(anon_vma); } } diff --git a/mm/mremap.c b/mm/mremap.c index ebe27a8efa62..463a25705ac6 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -135,7 +135,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte_unmap(new_pte - 1); pte_unmap_unlock(old_pte - 1, old_ptl); if (anon_vma) - anon_vma_unlock(anon_vma); + anon_vma_unlock_write(anon_vma); if (mapping) mutex_unlock(&mapping->i_mmap_mutex); } diff --git a/mm/rmap.c b/mm/rmap.c index 3d38edffda41..807c96bf0dc6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -105,7 +105,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma) */ if (rwsem_is_locked(&anon_vma->root->rwsem)) { anon_vma_lock_write(anon_vma); - anon_vma_unlock(anon_vma); + anon_vma_unlock_write(anon_vma); } kmem_cache_free(anon_vma_cachep, anon_vma); @@ -191,7 +191,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) avc = NULL; } spin_unlock(&mm->page_table_lock); - anon_vma_unlock(anon_vma); + anon_vma_unlock_write(anon_vma); if (unlikely(allocated)) put_anon_vma(allocated); @@ -308,7 +308,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) vma->anon_vma = anon_vma; anon_vma_lock_write(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); - anon_vma_unlock(anon_vma); + anon_vma_unlock_write(anon_vma); return 0; -- cgit v1.2.3 From 75f7ad8e043d9383337d917584297f7737154bbf Mon Sep 17 00:00:00 2001 From: Paul Szabo Date: Fri, 22 Feb 2013 16:34:42 -0800 Subject: page-writeback.c: subtract min_free_kbytes from dirtyable memory When calculating amount of dirtyable memory, min_free_kbytes should be subtracted because it is not intended for dirty pages. Addresses http://bugs.debian.org/695182 [akpm@linux-foundation.org: fix up min_free_kbytes extern declarations] [akpm@linux-foundation.org: fix min() warning] Signed-off-by: Paul Szabo Acked-by: Rik van Riel Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ kernel/sysctl.c | 1 - mm/huge_memory.c | 1 - mm/page-writeback.c | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1d4122bf6f27..437da0ce78c7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1393,6 +1393,9 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); +/* page_alloc.c */ +extern int min_free_kbytes; + /* nommu.c */ extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 467d8b923fcd..95e9e55602a8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -105,7 +105,6 @@ extern char core_pattern[]; extern unsigned int core_pipe_limit; #endif extern int pid_max; -extern int min_free_kbytes; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b1cc6591ed83..c63a21d0e991 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -105,7 +105,6 @@ static int set_recommended_min_free_kbytes(void) struct zone *zone; int nr_zones = 0; unsigned long recommended_min; - extern int min_free_kbytes; if (!khugepaged_enabled()) return 0; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7300c9d5e1d9..cdc377c456c0 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -241,6 +241,9 @@ static unsigned long global_dirtyable_memory(void) if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); + /* Subtract min_free_kbytes */ + x -= min_t(unsigned long, x, min_free_kbytes >> (PAGE_SHIFT - 10)); + return x + 1; /* Ensure that we never return 0 */ } -- cgit v1.2.3 From 4468b8f1e2d32ce79ef4bcb8e00d7e88627f1c3a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 22 Feb 2013 16:34:46 -0800 Subject: mm: uninline page_xchg_last_nid() Andrew Morton pointed out that page_xchg_last_nid() and reset_page_last_nid() were "getting nuttily large" and asked that it be investigated. reset_page_last_nid() is on the page free path and it would be unfortunate to make that path more expensive than it needs to be. Due to the internal use of page_xchg_last_nid() it is already too expensive but fortunately, it should also be impossible for the page->flags to be updated in parallel when we call reset_page_last_nid(). Instead of unlining the function, it uses a simplier implementation that assumes no parallel updates and should now be sufficiently short for inlining. page_xchg_last_nid() is called in paths that are already quite expensive (splitting huge page, fault handling, migration) and it is reasonable to uninline. There was not really a good place to place the function but mm/mmzone.c was the closest fit IMO. This patch saved 128 bytes of text in the vmlinux file for the kernel configuration I used for testing automatic NUMA balancing. Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +++++---------------- mm/mmzone.c | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 437da0ce78c7..8a5bbe3b9e56 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -677,25 +677,14 @@ static inline int page_last_nid(struct page *page) return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; } -static inline int page_xchg_last_nid(struct page *page, int nid) -{ - unsigned long old_flags, flags; - int last_nid; - - do { - old_flags = flags = page->flags; - last_nid = page_last_nid(page); - - flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); - flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; - } while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags)); - - return last_nid; -} +extern int page_xchg_last_nid(struct page *page, int nid); static inline void reset_page_last_nid(struct page *page) { - page_xchg_last_nid(page, (1 << LAST_NID_SHIFT) - 1); + int nid = (1 << LAST_NID_SHIFT) - 1; + + page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); + page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; } #endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ #else diff --git a/mm/mmzone.c b/mm/mmzone.c index 4596d81b89b1..bce796e8487f 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -1,7 +1,7 @@ /* * linux/mm/mmzone.c * - * management codes for pgdats and zones. + * management codes for pgdats, zones and page flags */ @@ -96,3 +96,21 @@ void lruvec_init(struct lruvec *lruvec) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); } + +#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NID_NOT_IN_PAGE_FLAGS) +int page_xchg_last_nid(struct page *page, int nid) +{ + unsigned long old_flags, flags; + int last_nid; + + do { + old_flags = flags = page->flags; + last_nid = page_last_nid(page); + + flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); + flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; + } while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags)); + + return last_nid; +} +#endif -- cgit v1.2.3 From 22b751c3d0376e86a377e3a0aa2ddbbe9d2eefc1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 22 Feb 2013 16:34:59 -0800 Subject: mm: rename page struct field helpers The function names page_xchg_last_nid(), page_last_nid() and reset_page_last_nid() were judged to be inconsistent so rename them to a struct_field_op style pattern. As it looked jarring to have reset_page_mapcount() and page_nid_reset_last() beside each other in memmap_init_zone(), this patch also renames reset_page_mapcount() to page_mapcount_reset(). There are others like init_page_count() but as it is used throughout the arch code a rename would likely cause more conflicts than it is worth. [akpm@linux-foundation.org: fix zcache] Signed-off-by: Mel Gorman Suggested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/zcache/zbud.c | 2 +- drivers/staging/zsmalloc/zsmalloc-main.c | 2 +- include/linux/mm.h | 20 ++++++++++---------- mm/huge_memory.c | 2 +- mm/mempolicy.c | 2 +- mm/migrate.c | 4 ++-- mm/mmzone.c | 4 ++-- mm/page_alloc.c | 10 +++++----- mm/slob.c | 2 +- mm/slub.c | 2 +- 10 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/zcache/zbud.c b/drivers/staging/zcache/zbud.c index 328c397ea5dc..fdff5c6a0239 100644 --- a/drivers/staging/zcache/zbud.c +++ b/drivers/staging/zcache/zbud.c @@ -404,7 +404,7 @@ static inline struct page *zbud_unuse_zbudpage(struct zbudpage *zbudpage, else zbud_pers_pageframes--; zbudpage_spin_unlock(zbudpage); - reset_page_mapcount(page); + page_mapcount_reset(page); init_page_count(page); page->index = 0; return page; diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c index 06f73a93a44d..e78d262c5249 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/drivers/staging/zsmalloc/zsmalloc-main.c @@ -472,7 +472,7 @@ static void reset_page(struct page *page) set_page_private(page, 0); page->mapping = NULL; page->freelist = NULL; - reset_page_mapcount(page); + page_mapcount_reset(page); } static void free_zspage(struct page *first_page) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a5bbe3b9e56..fe039bdba4ed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -367,7 +367,7 @@ static inline struct page *compound_head(struct page *page) * both from it and to it can be tracked, using atomic_inc_and_test * and atomic_add_negative(-1). */ -static inline void reset_page_mapcount(struct page *page) +static inline void page_mapcount_reset(struct page *page) { atomic_set(&(page)->_mapcount, -1); } @@ -658,28 +658,28 @@ static inline int page_to_nid(const struct page *page) #ifdef CONFIG_NUMA_BALANCING #ifdef LAST_NID_NOT_IN_PAGE_FLAGS -static inline int page_xchg_last_nid(struct page *page, int nid) +static inline int page_nid_xchg_last(struct page *page, int nid) { return xchg(&page->_last_nid, nid); } -static inline int page_last_nid(struct page *page) +static inline int page_nid_last(struct page *page) { return page->_last_nid; } -static inline void reset_page_last_nid(struct page *page) +static inline void page_nid_reset_last(struct page *page) { page->_last_nid = -1; } #else -static inline int page_last_nid(struct page *page) +static inline int page_nid_last(struct page *page) { return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; } -extern int page_xchg_last_nid(struct page *page, int nid); +extern int page_nid_xchg_last(struct page *page, int nid); -static inline void reset_page_last_nid(struct page *page) +static inline void page_nid_reset_last(struct page *page) { int nid = (1 << LAST_NID_SHIFT) - 1; @@ -688,17 +688,17 @@ static inline void reset_page_last_nid(struct page *page) } #endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ #else -static inline int page_xchg_last_nid(struct page *page, int nid) +static inline int page_nid_xchg_last(struct page *page, int nid) { return page_to_nid(page); } -static inline int page_last_nid(struct page *page) +static inline int page_nid_last(struct page *page) { return page_to_nid(page); } -static inline void reset_page_last_nid(struct page *page) +static inline void page_nid_reset_last(struct page *page) { } #endif diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c63a21d0e991..6049376c7226 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1639,7 +1639,7 @@ static void __split_huge_page_refcount(struct page *page) page_tail->mapping = page->mapping; page_tail->index = page->index + i; - page_xchg_last_nid(page_tail, page_last_nid(page)); + page_nid_xchg_last(page_tail, page_nid_last(page)); BUG_ON(!PageAnon(page_tail)); BUG_ON(!PageUptodate(page_tail)); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 6f7979c566d9..2ae78e255e08 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2316,7 +2316,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long * it less likely we act on an unlikely task<->page * relation. */ - last_nid = page_xchg_last_nid(page, polnid); + last_nid = page_nid_xchg_last(page, polnid); if (last_nid != polnid) goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index f560071e89c5..de5c371a7969 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1497,7 +1497,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, __GFP_NOWARN) & ~GFP_IOFS, 0); if (newpage) - page_xchg_last_nid(newpage, page_last_nid(page)); + page_nid_xchg_last(newpage, page_nid_last(page)); return newpage; } @@ -1681,7 +1681,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, if (!new_page) goto out_fail; - page_xchg_last_nid(new_page, page_last_nid(page)); + page_nid_xchg_last(new_page, page_nid_last(page)); isolated = numamigrate_isolate_page(pgdat, page); if (!isolated) { diff --git a/mm/mmzone.c b/mm/mmzone.c index bce796e8487f..2ac0afbd68f3 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -98,14 +98,14 @@ void lruvec_init(struct lruvec *lruvec) } #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NID_NOT_IN_PAGE_FLAGS) -int page_xchg_last_nid(struct page *page, int nid) +int page_nid_xchg_last(struct page *page, int nid) { unsigned long old_flags, flags; int last_nid; do { old_flags = flags = page->flags; - last_nid = page_last_nid(page); + last_nid = page_nid_last(page); flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3ede25e6686e..445718b328b6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -295,7 +295,7 @@ static void bad_page(struct page *page) /* Don't complain about poisoned pages */ if (PageHWPoison(page)) { - reset_page_mapcount(page); /* remove PageBuddy */ + page_mapcount_reset(page); /* remove PageBuddy */ return; } @@ -327,7 +327,7 @@ static void bad_page(struct page *page) dump_stack(); out: /* Leave bad fields for debug, except PageBuddy could make trouble */ - reset_page_mapcount(page); /* remove PageBuddy */ + page_mapcount_reset(page); /* remove PageBuddy */ add_taint(TAINT_BAD_PAGE); } @@ -613,7 +613,7 @@ static inline int free_pages_check(struct page *page) bad_page(page); return 1; } - reset_page_last_nid(page); + page_nid_reset_last(page); if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; return 0; @@ -3894,8 +3894,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, set_page_links(page, zone, nid, pfn); mminit_verify_page_links(page, zone, nid, pfn); init_page_count(page); - reset_page_mapcount(page); - reset_page_last_nid(page); + page_mapcount_reset(page); + page_nid_reset_last(page); SetPageReserved(page); /* * Mark the block movable so that blocks are reserved for diff --git a/mm/slob.c b/mm/slob.c index a99fdf7a0907..eeed4a05a2ef 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -360,7 +360,7 @@ static void slob_free(void *block, int size) clear_slob_page_free(sp); spin_unlock_irqrestore(&slob_lock, flags); __ClearPageSlab(sp); - reset_page_mapcount(sp); + page_mapcount_reset(sp); slob_free_pages(b, 0); return; } diff --git a/mm/slub.c b/mm/slub.c index ba2ca53f6c3a..ebcc44eb43b9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1408,7 +1408,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlab(page); memcg_release_pages(s, order); - reset_page_mapcount(page); + page_mapcount_reset(page); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; __free_memcg_kmem_pages(page, order); -- cgit v1.2.3 From cbf86cfe04a66471f23b9e62e5eba4e525f38855 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 22 Feb 2013 16:35:08 -0800 Subject: ksm: remove old stable nodes more thoroughly Switching merge_across_nodes after running KSM is liable to oops on stale nodes still left over from the previous stable tree. It's not something that people will often want to do, but it would be lame to demand a reboot when they're trying to determine which merge_across_nodes setting is best. How can this happen? We only permit switching merge_across_nodes when pages_shared is 0, and usually set run 2 to force that beforehand, which ought to unmerge everything: yet oopses still occur when you then run 1. Three causes: 1. The old stable tree (built according to the inverse merge_across_nodes) has not been fully torn down. A stable node lingers until get_ksm_page() notices that the page it references no longer references it: but the page is not necessarily freed as soon as expected, particularly when swapcache. Fix this with a pass through the old stable tree, applying get_ksm_page() to each of the remaining nodes (most found stale and removed immediately), with forced removal of any left over. Unless the page is still mapped: I've not seen that case, it shouldn't occur, but better to WARN_ON_ONCE and EBUSY than BUG. 2. __ksm_enter() has a nice little optimization, to insert the new mm just behind ksmd's cursor, so there's a full pass for it to stabilize (or be removed) before ksmd addresses it. Nice when ksmd is running, but not so nice when we're trying to unmerge all mms: we were missing those mms forked and inserted behind the unmerge cursor. Easily fixed by inserting at the end when KSM_RUN_UNMERGE. 3. It is possible for a KSM page to be faulted back from swapcache into an mm, just after unmerge_and_remove_all_rmap_items() scanned past it. Fix this by copying on fault when KSM_RUN_UNMERGE: but that is private to ksm.c, so dissolve the distinction between ksm_might_need_to_copy() and ksm_does_need_to_copy(), doing it all in the one call into ksm.c. A long outstanding, unrelated bugfix sneaks in with that third fix: ksm_does_need_to_copy() would copy from a !PageUptodate page (implying I/O error when read in from swap) to a page which it then marks Uptodate. Fix this case by not copying, letting do_swap_page() discover the error. Signed-off-by: Hugh Dickins Cc: Rik van Riel Cc: Petr Holasek Cc: Andrea Arcangeli Cc: Izik Eidus Cc: Gerald Schaefer Cc: KOSAKI Motohiro Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 18 +++--------- mm/ksm.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++--- mm/memory.c | 19 ++++++------ 3 files changed, 92 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 3319a6967626..45c9b6a17bcb 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -16,9 +16,6 @@ struct stable_node; struct mem_cgroup; -struct page *ksm_does_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); - #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); @@ -73,15 +70,8 @@ static inline void set_page_stable_node(struct page *page, * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -static inline int ksm_might_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address) -{ - struct anon_vma *anon_vma = page_anon_vma(page); - - return anon_vma && - (anon_vma->root != vma->anon_vma->root || - page->index != linear_page_index(vma, address)); -} +struct page *ksm_might_need_to_copy(struct page *page, + struct vm_area_struct *vma, unsigned long address); int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, unsigned long *vm_flags); @@ -113,10 +103,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline int ksm_might_need_to_copy(struct page *page, +static inline struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { - return 0; + return page; } static inline int page_referenced_ksm(struct page *page, diff --git a/mm/ksm.c b/mm/ksm.c index e02430fb26f6..4c22cdff02ad 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -644,6 +644,57 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, /* * Only called through the sysfs control interface: */ +static int remove_stable_node(struct stable_node *stable_node) +{ + struct page *page; + int err; + + page = get_ksm_page(stable_node, true); + if (!page) { + /* + * get_ksm_page did remove_node_from_stable_tree itself. + */ + return 0; + } + + if (WARN_ON_ONCE(page_mapped(page))) + err = -EBUSY; + else { + /* + * This page might be in a pagevec waiting to be freed, + * or it might be PageSwapCache (perhaps under writeback), + * or it might have been removed from swapcache a moment ago. + */ + set_page_stable_node(page, NULL); + remove_node_from_stable_tree(stable_node); + err = 0; + } + + unlock_page(page); + put_page(page); + return err; +} + +static int remove_all_stable_nodes(void) +{ + struct stable_node *stable_node; + int nid; + int err = 0; + + for (nid = 0; nid < nr_node_ids; nid++) { + while (root_stable_tree[nid].rb_node) { + stable_node = rb_entry(root_stable_tree[nid].rb_node, + struct stable_node, node); + if (remove_stable_node(stable_node)) { + err = -EBUSY; + break; /* proceed to next nid */ + } + cond_resched(); + } + } + return err; +} + static int unmerge_and_remove_all_rmap_items(void) { struct mm_slot *mm_slot; @@ -691,6 +742,8 @@ static int unmerge_and_remove_all_rmap_items(void) } } + /* Clean up stable nodes, but don't worry if some are still busy */ + remove_all_stable_nodes(); ksm_scan.seqnr = 0; return 0; @@ -1586,11 +1639,19 @@ int __ksm_enter(struct mm_struct *mm) spin_lock(&ksm_mmlist_lock); insert_to_mm_slots_hash(mm, mm_slot); /* - * Insert just behind the scanning cursor, to let the area settle + * When KSM_RUN_MERGE (or KSM_RUN_STOP), + * insert just behind the scanning cursor, to let the area settle * down a little; when fork is followed by immediate exec, we don't * want ksmd to waste time setting up and tearing down an rmap_list. + * + * But when KSM_RUN_UNMERGE, it's important to insert ahead of its + * scanning cursor, otherwise KSM pages in newly forked mms will be + * missed: then we might as well insert at the end of the list. */ - list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list); + if (ksm_run & KSM_RUN_UNMERGE) + list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list); + else + list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list); spin_unlock(&ksm_mmlist_lock); set_bit(MMF_VM_MERGEABLE, &mm->flags); @@ -1640,11 +1701,25 @@ void __ksm_exit(struct mm_struct *mm) } } -struct page *ksm_does_need_to_copy(struct page *page, +struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { + struct anon_vma *anon_vma = page_anon_vma(page); struct page *new_page; + if (PageKsm(page)) { + if (page_stable_node(page) && + !(ksm_run & KSM_RUN_UNMERGE)) + return page; /* no need to copy it */ + } else if (!anon_vma) { + return page; /* no need to copy it */ + } else if (anon_vma->root == vma->anon_vma->root && + page->index == linear_page_index(vma, address)) { + return page; /* still no need to copy it */ + } + if (!PageUptodate(page)) + return page; /* let do_swap_page report the error */ + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (new_page) { copy_user_highpage(new_page, page, address, vma); @@ -2024,7 +2099,7 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj, mutex_lock(&ksm_thread_mutex); if (ksm_merge_across_nodes != knob) { - if (ksm_pages_shared) + if (ksm_pages_shared || remove_all_stable_nodes()) err = -EBUSY; else ksm_merge_across_nodes = knob; diff --git a/mm/memory.c b/mm/memory.c index 054250ee4a68..7bd22a621817 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2994,17 +2994,16 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) goto out_page; - if (ksm_might_need_to_copy(page, vma, address)) { - swapcache = page; - page = ksm_does_need_to_copy(page, vma, address); - - if (unlikely(!page)) { - ret = VM_FAULT_OOM; - page = swapcache; - swapcache = NULL; - goto out_page; - } + swapcache = page; + page = ksm_might_need_to_copy(page, vma, address); + if (unlikely(!page)) { + ret = VM_FAULT_OOM; + page = swapcache; + swapcache = NULL; + goto out_page; } + if (page == swapcache) + swapcache = NULL; if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { ret = VM_FAULT_OOM; -- cgit v1.2.3 From 9c620e2bc5aa4256c102ada34e6c76204ed5898b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 22 Feb 2013 16:35:14 -0800 Subject: mm: remove offlining arg to migrate_pages No functional change, but the only purpose of the offlining argument to migrate_pages() etc, was to ensure that __unmap_and_move() could migrate a KSM page for memory hotremove (which took ksm_thread_mutex) but not for other callers. Now all cases are safe, remove the arg. Signed-off-by: Hugh Dickins Cc: Rik van Riel Cc: Petr Holasek Cc: Andrea Arcangeli Cc: Izik Eidus Cc: Gerald Schaefer Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 14 ++++++-------- mm/compaction.c | 2 +- mm/memory-failure.c | 7 +++---- mm/memory_hotplug.c | 3 +-- mm/mempolicy.c | 8 +++----- mm/migrate.c | 35 +++++++++++++---------------------- mm/page_alloc.c | 6 ++---- 7 files changed, 29 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 1e9f627967a3..a405d3dc0f61 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -40,11 +40,9 @@ extern void putback_movable_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t x, - unsigned long private, bool offlining, - enum migrate_mode mode, int reason); + unsigned long private, enum migrate_mode mode, int reason); extern int migrate_huge_page(struct page *, new_page_t x, - unsigned long private, bool offlining, - enum migrate_mode mode); + unsigned long private, enum migrate_mode mode); extern int fail_migrate_page(struct address_space *, struct page *, struct page *); @@ -62,11 +60,11 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, static inline void putback_lru_pages(struct list_head *l) {} static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, - unsigned long private, bool offlining, - enum migrate_mode mode, int reason) { return -ENOSYS; } + unsigned long private, enum migrate_mode mode, int reason) + { return -ENOSYS; } static inline int migrate_huge_page(struct page *page, new_page_t x, - unsigned long private, bool offlining, - enum migrate_mode mode) { return -ENOSYS; } + unsigned long private, enum migrate_mode mode) + { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } diff --git a/mm/compaction.c b/mm/compaction.c index ef53f352b606..25e75e3e2ac6 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -980,7 +980,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, - (unsigned long)cc, false, + (unsigned long)cc, cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC, MR_COMPACTION); update_nr_listpages(cc); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 1a56d63adf9c..e4683459ab26 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1465,7 +1465,7 @@ static int soft_offline_huge_page(struct page *page, int flags) unlock_page(hpage); /* Keep page count to indicate a given hugepage is isolated. */ - ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, + ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, MIGRATE_SYNC); put_page(hpage); if (ret) { @@ -1597,11 +1597,10 @@ static int __soft_offline_page(struct page *page, int flags) if (!ret) { LIST_HEAD(pagelist); inc_zone_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, - false, MIGRATE_SYNC, - MR_MEMORY_FAILURE); + MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { putback_lru_pages(&pagelist); pr_info("soft offline: %#lx: migration failed %d, type %lx\n", diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 77a6abfe3291..dda1ca695a08 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1286,8 +1286,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) * migrate_pages returns # of failed pages. */ ret = migrate_pages(&source, alloc_migrate_target, 0, - true, MIGRATE_SYNC, - MR_MEMORY_HOTPLUG); + MIGRATE_SYNC, MR_MEMORY_HOTPLUG); if (ret) putback_lru_pages(&source); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d344c36db63f..e2661d1c5c33 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1014,8 +1014,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_node_page, dest, - false, MIGRATE_SYNC, - MR_SYSCALL); + MIGRATE_SYNC, MR_SYSCALL); if (err) putback_lru_pages(&pagelist); } @@ -1259,9 +1258,8 @@ static long do_mbind(unsigned long start, unsigned long len, if (!list_empty(&pagelist)) { WARN_ON_ONCE(flags & MPOL_MF_LAZY); nr_failed = migrate_pages(&pagelist, new_vma_page, - (unsigned long)vma, - false, MIGRATE_SYNC, - MR_MEMPOLICY_MBIND); + (unsigned long)vma, + MIGRATE_SYNC, MR_MEMPOLICY_MBIND); if (nr_failed) putback_lru_pages(&pagelist); } diff --git a/mm/migrate.c b/mm/migrate.c index 20a03eb0667f..3bbaf5d230b0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -701,7 +701,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, } static int __unmap_and_move(struct page *page, struct page *newpage, - int force, bool offlining, enum migrate_mode mode) + int force, enum migrate_mode mode) { int rc = -EAGAIN; int remap_swapcache = 1; @@ -847,8 +847,7 @@ out: * to the newly allocated page in newpage. */ static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, bool offlining, - enum migrate_mode mode) + struct page *page, int force, enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -866,7 +865,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, if (unlikely(split_huge_page(page))) goto out; - rc = __unmap_and_move(page, newpage, force, offlining, mode); + rc = __unmap_and_move(page, newpage, force, mode); if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) { /* @@ -926,8 +925,7 @@ out: */ static int unmap_and_move_huge_page(new_page_t get_new_page, unsigned long private, struct page *hpage, - int force, bool offlining, - enum migrate_mode mode) + int force, enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -989,9 +987,8 @@ out: * * Return: Number of pages not migrated or error code. */ -int migrate_pages(struct list_head *from, - new_page_t get_new_page, unsigned long private, bool offlining, - enum migrate_mode mode, int reason) +int migrate_pages(struct list_head *from, new_page_t get_new_page, + unsigned long private, enum migrate_mode mode, int reason) { int retry = 1; int nr_failed = 0; @@ -1012,8 +1009,7 @@ int migrate_pages(struct list_head *from, cond_resched(); rc = unmap_and_move(get_new_page, private, - page, pass > 2, offlining, - mode); + page, pass > 2, mode); switch(rc) { case -ENOMEM: @@ -1046,15 +1042,13 @@ out: } int migrate_huge_page(struct page *hpage, new_page_t get_new_page, - unsigned long private, bool offlining, - enum migrate_mode mode) + unsigned long private, enum migrate_mode mode) { int pass, rc; for (pass = 0; pass < 10; pass++) { - rc = unmap_and_move_huge_page(get_new_page, - private, hpage, pass > 2, offlining, - mode); + rc = unmap_and_move_huge_page(get_new_page, private, + hpage, pass > 2, mode); switch (rc) { case -ENOMEM: goto out; @@ -1177,8 +1171,7 @@ set_status: err = 0; if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, new_page_node, - (unsigned long)pm, 0, MIGRATE_SYNC, - MR_SYSCALL); + (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL); if (err) putback_lru_pages(&pagelist); } @@ -1613,10 +1606,8 @@ int migrate_misplaced_page(struct page *page, int node) goto out; list_add(&page->lru, &migratepages); - nr_remaining = migrate_pages(&migratepages, - alloc_misplaced_dst_page, - node, false, MIGRATE_ASYNC, - MR_NUMA_MISPLACED); + nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, + node, MIGRATE_ASYNC, MR_NUMA_MISPLACED); if (nr_remaining) { putback_lru_pages(&migratepages); isolated = 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 445718b328b6..64c83a8c3220 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6084,10 +6084,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, &cc->migratepages); cc->nr_migratepages -= nr_reclaimed; - ret = migrate_pages(&cc->migratepages, - alloc_migrate_target, - 0, false, MIGRATE_SYNC, - MR_CMA); + ret = migrate_pages(&cc->migratepages, alloc_migrate_target, + 0, MIGRATE_SYNC, MR_CMA); } if (ret < 0) { putback_movable_pages(&cc->migratepages); -- cgit v1.2.3 From e3790144c9091631a18564aa64db8a971da02c41 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 22 Feb 2013 16:35:19 -0800 Subject: mm: refactor inactive_file_is_low() to use get_lru_size() An inactive file list is considered low when its active counterpart is bigger, regardless of whether it is a global zone LRU list or a memcg zone LRU list. The only difference is in how the LRU size is assessed. get_lru_size() does the right thing for both global and memcg reclaim situations. Get rid of inactive_file_is_low_global() and mem_cgroup_inactive_file_is_low() by using get_lru_size() and compare the numbers in common code. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 ------- mm/memcontrol.c | 11 ----------- mm/vmscan.c | 19 ++++++------------- 3 files changed, 6 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 28bd5fa2ff2e..d6183f06d8c1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -116,7 +116,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); * For memory reclaim. */ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); -int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); @@ -321,12 +320,6 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) return 1; } -static inline int -mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) -{ - return 1; -} - static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 60d28e36f0e0..af4b04f4d744 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1396,17 +1396,6 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) return inactive * inactive_ratio < active; } -int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) -{ - unsigned long active; - unsigned long inactive; - - inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE); - active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE); - - return (active > inactive); -} - #define mem_cgroup_from_res_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) diff --git a/mm/vmscan.c b/mm/vmscan.c index b7d8015a6d54..396ecee281d0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1579,16 +1579,6 @@ static inline int inactive_anon_is_low(struct lruvec *lruvec) } #endif -static int inactive_file_is_low_global(struct zone *zone) -{ - unsigned long active, inactive; - - active = zone_page_state(zone, NR_ACTIVE_FILE); - inactive = zone_page_state(zone, NR_INACTIVE_FILE); - - return (active > inactive); -} - /** * inactive_file_is_low - check if file pages need to be deactivated * @lruvec: LRU vector to check @@ -1605,10 +1595,13 @@ static int inactive_file_is_low_global(struct zone *zone) */ static int inactive_file_is_low(struct lruvec *lruvec) { - if (!mem_cgroup_disabled()) - return mem_cgroup_inactive_file_is_low(lruvec); + unsigned long inactive; + unsigned long active; + + inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE); + active = get_lru_size(lruvec, LRU_ACTIVE_FILE); - return inactive_file_is_low_global(lruvec_zone(lruvec)); + return active > inactive; } static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru) -- cgit v1.2.3 From 9127ab4ff92f0ecd7b4671efa9d0edb21c691e9f Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 22 Feb 2013 16:35:21 -0800 Subject: mm: add SECTION_IN_PAGE_FLAGS Instead of directly utilizing a combination of config options to determine this, add a macro to specifically address it. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Catalin Marinas Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fe039bdba4ed..97da0302cf51 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -625,6 +625,10 @@ static inline enum zone_type page_zonenum(const struct page *page) return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +#define SECTION_IN_PAGE_FLAGS +#endif + /* * The identification function is only used by the buddy allocator for * determining if two pages could be buddies. We are not really @@ -708,7 +712,7 @@ static inline struct zone *page_zone(const struct page *page) return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; } -#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +#ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); @@ -738,7 +742,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, { set_page_zone(page, zone); set_page_node(page, node); -#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +#ifdef SECTION_IN_PAGE_FLAGS set_page_section(page, pfn_to_section_nr(pfn)); #endif } -- cgit v1.2.3 From 108bcc96ef7047c02cad4d229f04da38186a3f3f Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 22 Feb 2013 16:35:23 -0800 Subject: mm: add & use zone_end_pfn() and zone_spans_pfn() Add 2 helpers (zone_end_pfn() and zone_spans_pfn()) to reduce code duplication. This also switches to using them in compaction (where an additional variable needed to be renamed), page_alloc, vmstat, memory_hotplug, and kmemleak. Note that in compaction.c I avoid calling zone_end_pfn() repeatedly because I expect at some point the sycronization issues with start_pfn & spanned_pages will need fixing, either by actually using the seqlock or clever memory barrier usage. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Catalin Marinas Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++++++++++ mm/compaction.c | 10 +++++----- mm/kmemleak.c | 5 ++--- mm/memory_hotplug.c | 10 +++++----- mm/page_alloc.c | 22 +++++++++------------- mm/vmstat.c | 2 +- 6 files changed, 32 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6c80d0ac14dd..34343f51e211 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -527,6 +527,16 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } +static inline unsigned zone_end_pfn(const struct zone *zone) +{ + return zone->zone_start_pfn + zone->spanned_pages; +} + +static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) +{ + return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); +} + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the diff --git a/mm/compaction.c b/mm/compaction.c index 25e75e3e2ac6..05ccb4cc0bdb 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -86,7 +86,7 @@ static inline bool isolation_suitable(struct compact_control *cc, static void __reset_isolation_suitable(struct zone *zone) { unsigned long start_pfn = zone->zone_start_pfn; - unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages; + unsigned long end_pfn = zone_end_pfn(zone); unsigned long pfn; zone->compact_cached_migrate_pfn = start_pfn; @@ -647,7 +647,7 @@ static void isolate_freepages(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; + unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn; int nr_freepages = cc->nr_freepages; struct list_head *freelist = &cc->freepages; @@ -666,7 +666,7 @@ static void isolate_freepages(struct zone *zone, */ high_pfn = min(low_pfn, pfn); - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + z_end_pfn = zone_end_pfn(zone); /* * Isolate free pages until enough are available to migrate the @@ -709,7 +709,7 @@ static void isolate_freepages(struct zone *zone, * only scans within a pageblock */ end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); - end_pfn = min(end_pfn, zone_end_pfn); + end_pfn = min(end_pfn, z_end_pfn); isolated = isolate_freepages_block(cc, pfn, end_pfn, freelist, false); nr_freepages += isolated; @@ -923,7 +923,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) { int ret; unsigned long start_pfn = zone->zone_start_pfn; - unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages; + unsigned long end_pfn = zone_end_pfn(zone); ret = compaction_suitable(zone, cc->order); switch (ret) { diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 752a705c77c2..83dd5fbf5e60 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1300,9 +1300,8 @@ static void kmemleak_scan(void) */ lock_memory_hotplug(); for_each_online_node(i) { - pg_data_t *pgdat = NODE_DATA(i); - unsigned long start_pfn = pgdat->node_start_pfn; - unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; + unsigned long start_pfn = node_start_pfn(i); + unsigned long end_pfn = node_end_pfn(i); unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn++) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index dda1ca695a08..8b3235eedf3d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -299,7 +299,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2, pgdat_resize_lock(z1->zone_pgdat, &flags); /* can't move pfns which are higher than @z2 */ - if (end_pfn > z2->zone_start_pfn + z2->spanned_pages) + if (end_pfn > zone_end_pfn(z2)) goto out_fail; /* the move out part mast at the left most of @z2 */ if (start_pfn > z2->zone_start_pfn) @@ -315,7 +315,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2, z1_start_pfn = start_pfn; resize_zone(z1, z1_start_pfn, end_pfn); - resize_zone(z2, end_pfn, z2->zone_start_pfn + z2->spanned_pages); + resize_zone(z2, end_pfn, zone_end_pfn(z2)); pgdat_resize_unlock(z1->zone_pgdat, &flags); @@ -347,15 +347,15 @@ static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2, if (z1->zone_start_pfn > start_pfn) goto out_fail; /* the move out part mast at the right most of @z1 */ - if (z1->zone_start_pfn + z1->spanned_pages > end_pfn) + if (zone_end_pfn(z1) > end_pfn) goto out_fail; /* must included/overlap */ - if (start_pfn >= z1->zone_start_pfn + z1->spanned_pages) + if (start_pfn >= zone_end_pfn(z1)) goto out_fail; /* use end_pfn for z2's end_pfn if z2 is empty */ if (z2->spanned_pages) - z2_end_pfn = z2->zone_start_pfn + z2->spanned_pages; + z2_end_pfn = zone_end_pfn(z2); else z2_end_pfn = end_pfn; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 64c83a8c3220..a3687afc5917 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -250,9 +250,7 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page) do { seq = zone_span_seqbegin(zone); - if (pfn >= zone->zone_start_pfn + zone->spanned_pages) - ret = 1; - else if (pfn < zone->zone_start_pfn) + if (!zone_spans_pfn(zone, pfn)) ret = 1; } while (zone_span_seqretry(zone, seq)); @@ -990,9 +988,9 @@ int move_freepages_block(struct zone *zone, struct page *page, end_pfn = start_pfn + pageblock_nr_pages - 1; /* Do not cross zone boundaries */ - if (start_pfn < zone->zone_start_pfn) + if (!zone_spans_pfn(zone, start_pfn)) start_page = page; - if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages) + if (!zone_spans_pfn(zone, end_pfn)) return 0; return move_freepages(zone, start_page, end_page, migratetype); @@ -1286,7 +1284,7 @@ void mark_free_pages(struct zone *zone) spin_lock_irqsave(&zone->lock, flags); - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); @@ -3798,7 +3796,7 @@ static void setup_zone_migrate_reserve(struct zone *zone) * the block. */ start_pfn = zone->zone_start_pfn; - end_pfn = start_pfn + zone->spanned_pages; + end_pfn = zone_end_pfn(zone); start_pfn = roundup(start_pfn, pageblock_nr_pages); reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> pageblock_order; @@ -3912,7 +3910,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * pfn out of zone. */ if ((z->zone_start_pfn <= pfn) - && (pfn < z->zone_start_pfn + z->spanned_pages) + && (pfn < zone_end_pfn(z)) && !(pfn & (pageblock_nr_pages - 1))) set_pageblock_migratetype(page, MIGRATE_MOVABLE); @@ -4713,7 +4711,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) * for the buddy allocator to function correctly. */ start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); - end = pgdat->node_start_pfn + pgdat->node_spanned_pages; + end = pgdat_end_pfn(pgdat); end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); map = alloc_remap(pgdat->node_id, size); @@ -5928,8 +5926,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); - VM_BUG_ON(pfn < zone->zone_start_pfn); - VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages); + VM_BUG_ON(!zone_spans_pfn(zone, pfn)); for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) if (flags & value) @@ -6027,8 +6024,7 @@ bool is_pageblock_removable_nolock(struct page *page) zone = page_zone(page); pfn = page_to_pfn(page); - if (zone->zone_start_pfn > pfn || - zone->zone_start_pfn + zone->spanned_pages <= pfn) + if (!zone_spans_pfn(zone, pfn)) return false; return !has_unmovable_pages(zone, page, 0, true); diff --git a/mm/vmstat.c b/mm/vmstat.c index 57f02fd1768b..e1d8ed172c42 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -891,7 +891,7 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, int mtype; unsigned long pfn; unsigned long start_pfn = zone->zone_start_pfn; - unsigned long end_pfn = start_pfn + zone->spanned_pages; + unsigned long end_pfn = zone_end_pfn(zone); unsigned long count[MIGRATE_TYPES] = { 0, }; for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { -- cgit v1.2.3 From 2a6e3ebee2edcade56f836390a5f0c7b76ff5f9e Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 22 Feb 2013 16:35:24 -0800 Subject: mm: add zone_is_empty() and zone_is_initialized() Factoring out these 2 checks makes it more clear what we are actually checking for. Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Catalin Marinas Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 34343f51e211..cf8925962b68 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -537,6 +537,16 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); } +static inline bool zone_is_initialized(struct zone *zone) +{ + return !!zone->wait_table; +} + +static inline bool zone_is_empty(struct zone *zone) +{ + return zone->spanned_pages == 0; +} + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the -- cgit v1.2.3 From da3649e133948d8b7d8c57b05a33faf62ac2cc7e Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 22 Feb 2013 16:35:27 -0800 Subject: mmzone: add pgdat_{end_pfn,is_empty}() helpers & consolidate. Add pgdat_end_pfn() and pgdat_is_empty() helpers which match the similar zone_*() functions. Change node_end_pfn() to be a wrapper of pgdat_end_pfn(). Signed-off-by: Cody P Schafer Cc: David Hansen Cc: Catalin Marinas Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cf8925962b68..ede274957e05 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -756,11 +756,17 @@ typedef struct pglist_data { #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) +#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) -#define node_end_pfn(nid) ({\ - pg_data_t *__pgdat = NODE_DATA(nid);\ - __pgdat->node_start_pfn + __pgdat->node_spanned_pages;\ -}) +static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) +{ + return pgdat->node_start_pfn + pgdat->node_spanned_pages; +} + +static inline bool pgdat_is_empty(pg_data_t *pgdat) +{ + return !pgdat->node_start_pfn && !pgdat->node_spanned_pages; +} #include -- cgit v1.2.3 From ebec3862fd6eefe8301aa55ed2e30c685d831842 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Fri, 22 Feb 2013 16:35:43 -0800 Subject: mm: fix return type for functions nr_free_*_pages Currently, the amount of RAM that functions nr_free_*_pages return is held in unsigned int. But in machines with big memory (exceeding 16TB), the amount may be incorrect because of overflow, so fix it. Signed-off-by: Zhang Yanfei Cc: Simon Horman Cc: Julian Anastasov Cc: David Miller Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Cc: Mel Gorman Cc: Minchan Kim Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ++-- mm/page_alloc.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index a3e22d357e91..8a15f38ebc5c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -225,8 +225,8 @@ struct swap_list_t { extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; extern unsigned long dirty_balance_reserve; -extern unsigned int nr_free_buffer_pages(void); -extern unsigned int nr_free_pagecache_pages(void); +extern unsigned long nr_free_buffer_pages(void); +extern unsigned long nr_free_pagecache_pages(void); /* Definition of global_page_state not available yet */ #define nr_free_pages() global_page_state(NR_FREE_PAGES) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 159f81577774..276140654305 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2813,13 +2813,13 @@ void free_pages_exact(void *virt, size_t size) } EXPORT_SYMBOL(free_pages_exact); -static unsigned int nr_free_zone_pages(int offset) +static unsigned long nr_free_zone_pages(int offset) { struct zoneref *z; struct zone *zone; /* Just pick one node, since fallback list is circular */ - unsigned int sum = 0; + unsigned long sum = 0; struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); @@ -2836,7 +2836,7 @@ static unsigned int nr_free_zone_pages(int offset) /* * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL */ -unsigned int nr_free_buffer_pages(void) +unsigned long nr_free_buffer_pages(void) { return nr_free_zone_pages(gfp_zone(GFP_USER)); } @@ -2845,7 +2845,7 @@ EXPORT_SYMBOL_GPL(nr_free_buffer_pages); /* * Amount of free RAM allocatable within all zones */ -unsigned int nr_free_pagecache_pages(void) +unsigned long nr_free_pagecache_pages(void) { return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE)); } -- cgit v1.2.3 From b21e0b90ccb99a377bce0167fed1e881bb5065d7 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Fri, 22 Feb 2013 16:35:48 -0800 Subject: vmscan: change type of vm_total_pages to unsigned long This variable is calculated from nr_free_pagecache_pages so change its type to unsigned long. Signed-off-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- mm/vmscan.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 8a15f38ebc5c..2818a123f3ea 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -275,7 +275,7 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); -extern long vm_total_pages; +extern unsigned long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; diff --git a/mm/vmscan.c b/mm/vmscan.c index 606d0bb46091..88c5fed8b9a4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -128,7 +128,7 @@ struct scan_control { * From 0 .. 100. Higher means more swappy. */ int vm_swappiness = 60; -long vm_total_pages; /* The total number of pages which the VM controls */ +unsigned long vm_total_pages; /* The total number of pages which the VM controls */ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -- cgit v1.2.3 From 28a35716d317980ae9bc2ff2f84c33a3cda9e884 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:35:55 -0800 Subject: mm: use long type for page counts in mm_populate() and get_user_pages() Use long type for page counts in mm_populate() so as to avoid integer overflow when running the following test code: int main(void) { void *p = mmap(NULL, 0x100000000000, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); printf("p: %p\n", p); mlockall(MCL_CURRENT); printf("done\n"); return 0; } Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Mel Gorman Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 6 +++--- include/linux/mm.h | 15 ++++++++------- mm/hugetlb.c | 12 ++++++------ mm/memory.c | 18 +++++++++--------- mm/mlock.c | 4 ++-- mm/nommu.c | 15 ++++++++------- 6 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0c80d3f57a5b..eedc334fb6f5 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -43,9 +43,9 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, #endif int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); -int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, - struct page **, struct vm_area_struct **, - unsigned long *, int *, int, unsigned int flags); +long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, + struct page **, struct vm_area_struct **, + unsigned long *, unsigned long *, long, unsigned int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *); void __unmap_hugepage_range_final(struct mmu_gather *tlb, diff --git a/include/linux/mm.h b/include/linux/mm.h index 97da0302cf51..87b0ef253607 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1013,13 +1013,14 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, int write); -int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int len, unsigned int foll_flags, - struct page **pages, struct vm_area_struct **vmas, - int *nonblocking); -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int nr_pages, int write, int force, - struct page **pages, struct vm_area_struct **vmas); +long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + unsigned int foll_flags, struct page **pages, + struct vm_area_struct **vmas, int *nonblocking); +long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct kvec; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e14a8c79a1eb..cdb64e4d238a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2920,14 +2920,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return NULL; } -int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *position, int *length, int i, - unsigned int flags) +long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + struct page **pages, struct vm_area_struct **vmas, + unsigned long *position, unsigned long *nr_pages, + long i, unsigned int flags) { unsigned long pfn_offset; unsigned long vaddr = *position; - int remainder = *length; + unsigned long remainder = *nr_pages; struct hstate *h = hstate_vma(vma); spin_lock(&mm->page_table_lock); @@ -2997,7 +2997,7 @@ same_page: } } spin_unlock(&mm->page_table_lock); - *length = remainder; + *nr_pages = remainder; *position = vaddr; return i ? i : -EFAULT; diff --git a/mm/memory.c b/mm/memory.c index 7bd22a621817..bc929dbad215 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1677,15 +1677,15 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. */ -int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int nr_pages, unsigned int gup_flags, - struct page **pages, struct vm_area_struct **vmas, - int *nonblocking) +long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas, int *nonblocking) { - int i; + long i; unsigned long vm_flags; - if (nr_pages <= 0) + if (!nr_pages) return 0; VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); @@ -1981,9 +1981,9 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, * * See also get_user_pages_fast, for performance critical applications. */ -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int nr_pages, int write, int force, - struct page **pages, struct vm_area_struct **vmas) +long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, int write, + int force, struct page **pages, struct vm_area_struct **vmas) { int flags = FOLL_TOUCH; diff --git a/mm/mlock.c b/mm/mlock.c index 38db3b094105..e6638f565d42 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -160,7 +160,7 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; unsigned long addr = start; - int nr_pages = (end - start) / PAGE_SIZE; + unsigned long nr_pages = (end - start) / PAGE_SIZE; int gup_flags; VM_BUG_ON(start & ~PAGE_MASK); @@ -382,7 +382,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; - int ret = 0; + long ret = 0; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(len != PAGE_ALIGN(len)); diff --git a/mm/nommu.c b/mm/nommu.c index 87854a55829d..6ab706608492 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -140,10 +140,10 @@ unsigned int kobjsize(const void *objp) return PAGE_SIZE << compound_order(page); } -int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int nr_pages, unsigned int foll_flags, - struct page **pages, struct vm_area_struct **vmas, - int *retry) +long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + unsigned int foll_flags, struct page **pages, + struct vm_area_struct **vmas, int *nonblocking) { struct vm_area_struct *vma; unsigned long vm_flags; @@ -190,9 +190,10 @@ finish_or_fault: * slab page or a secondary page from a compound page * - don't permit access to VMAs that don't support it, such as I/O mappings */ -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int nr_pages, int write, int force, - struct page **pages, struct vm_area_struct **vmas) +long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + struct vm_area_struct **vmas) { int flags = 0; -- cgit v1.2.3 From 240aadeedc4a89fc44623f8ce4ca46bda73db07e Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 22 Feb 2013 16:35:56 -0800 Subject: mm: accelerate mm_populate() treatment of THP pages This change adds a follow_page_mask function which is equivalent to follow_page, but with an extra page_mask argument. follow_page_mask sets *page_mask to HPAGE_PMD_NR - 1 when it encounters a THP page, and to 0 in other cases. __get_user_pages() makes use of this in order to accelerate populating THP ranges - that is, when both the pages and vmas arrays are NULL, we don't need to iterate HPAGE_PMD_NR times to cover a single THP page (and we also avoid taking mm->page_table_lock that many times). Signed-off-by: Michel Lespinasse Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Mel Gorman Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 13 +++++++++++-- mm/memory.c | 31 +++++++++++++++++++++++-------- mm/nommu.c | 6 ++++-- 3 files changed, 38 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 87b0ef253607..6124f1db50fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1629,8 +1629,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); -struct page *follow_page(struct vm_area_struct *, unsigned long address, - unsigned int foll_flags); +struct page *follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int foll_flags, + unsigned int *page_mask); + +static inline struct page *follow_page(struct vm_area_struct *vma, + unsigned long address, unsigned int foll_flags) +{ + unsigned int unused_page_mask; + return follow_page_mask(vma, address, foll_flags, &unused_page_mask); +} + #define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_GET 0x04 /* do get_page on page */ diff --git a/mm/memory.c b/mm/memory.c index bc929dbad215..5d2ef1217d0c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1462,10 +1462,11 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, EXPORT_SYMBOL_GPL(zap_vma_ptes); /** - * follow_page - look up a page descriptor from a user-virtual address + * follow_page_mask - look up a page descriptor from a user-virtual address * @vma: vm_area_struct mapping @address * @address: virtual address to look up * @flags: flags modifying lookup behaviour + * @page_mask: on output, *page_mask is set according to the size of the page * * @flags can have FOLL_ flags set, defined in * @@ -1473,8 +1474,9 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes); * an error pointer if there is a mapping to something not represented * by a page descriptor (see also vm_normal_page()). */ -struct page *follow_page(struct vm_area_struct *vma, unsigned long address, - unsigned int flags) +struct page *follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags, + unsigned int *page_mask) { pgd_t *pgd; pud_t *pud; @@ -1484,6 +1486,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, struct page *page; struct mm_struct *mm = vma->vm_mm; + *page_mask = 0; + page = follow_huge_addr(mm, address, flags & FOLL_WRITE); if (!IS_ERR(page)) { BUG_ON(flags & FOLL_GET); @@ -1530,6 +1534,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, page = follow_trans_huge_pmd(vma, address, pmd, flags); spin_unlock(&mm->page_table_lock); + *page_mask = HPAGE_PMD_NR - 1; goto out; } } else @@ -1684,6 +1689,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, { long i; unsigned long vm_flags; + unsigned int page_mask; if (!nr_pages) return 0; @@ -1761,6 +1767,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, get_page(page); } pte_unmap(pte); + page_mask = 0; goto next_page; } @@ -1778,6 +1785,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, do { struct page *page; unsigned int foll_flags = gup_flags; + unsigned int page_increm; /* * If we have a pending SIGKILL, don't keep faulting @@ -1787,7 +1795,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, return i ? i : -ERESTARTSYS; cond_resched(); - while (!(page = follow_page(vma, start, foll_flags))) { + while (!(page = follow_page_mask(vma, start, + foll_flags, &page_mask))) { int ret; unsigned int fault_flags = 0; @@ -1861,13 +1870,19 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, flush_anon_page(vma, page, start); flush_dcache_page(page); + page_mask = 0; } next_page: - if (vmas) + if (vmas) { vmas[i] = vma; - i++; - start += PAGE_SIZE; - nr_pages--; + page_mask = 0; + } + page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + if (page_increm > nr_pages) + page_increm = nr_pages; + i += page_increm; + start += page_increm * PAGE_SIZE; + nr_pages -= page_increm; } while (nr_pages && start < vma->vm_end); } while (nr_pages); return i; diff --git a/mm/nommu.c b/mm/nommu.c index 6ab706608492..da0d210fd403 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1819,9 +1819,11 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, return ret; } -struct page *follow_page(struct vm_area_struct *vma, unsigned long address, - unsigned int foll_flags) +struct page *follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags, + unsigned int *page_mask) { + *page_mask = 0; return NULL; } -- cgit v1.2.3 From 5117b3b835f288314a2d4e5512bc1747e3a7c8ed Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 22 Feb 2013 16:36:07 -0800 Subject: mm,ksm: FOLL_MIGRATION do migration_entry_wait In "ksm: remove old stable nodes more thoroughly" I said that I'd never seen its WARN_ON_ONCE(page_mapped(page)). True at the time of writing, but it soon appeared once I tried fuller tests on the whole series. It turned out to be due to the KSM page migration itself: unmerge_and_ remove_all_rmap_items() failed to locate and replace all the KSM pages, because of that hiatus in page migration when old pte has been replaced by migration entry, but not yet by new pte. follow_page() finds no page at that instant, but a KSM page reappears shortly after, without a fault. Add FOLL_MIGRATION flag, so follow_page() can do migration_entry_wait() for KSM's break_cow(). I'd have preferred to avoid another flag, and do it every time, in case someone else makes the same easy mistake; but did not find another transgressor (the common get_user_pages() is of course safe), and cannot be sure that every follow_page() caller is prepared to sleep - ia64's xencomm_vtop()? Now, THP's wait_split_huge_page() can already sleep there, since anon_vma locking was changed to mutex, but maybe that's somehow excluded. Signed-off-by: Hugh Dickins Cc: Mel Gorman Cc: Petr Holasek Cc: Andrea Arcangeli Cc: Izik Eidus Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + mm/ksm.c | 2 +- mm/memory.c | 20 ++++++++++++++++++-- 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6124f1db50fe..e7c3f9a0111a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1651,6 +1651,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ +#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/ksm.c b/mm/ksm.c index 0320327b8a6c..d61cba6fa1dc 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -364,7 +364,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) do { cond_resched(); - page = follow_page(vma, addr, FOLL_GET); + page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION); if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) diff --git a/mm/memory.c b/mm/memory.c index 5d2ef1217d0c..ec8ba011fa7d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1548,8 +1548,24 @@ split_fallthrough: ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; - if (!pte_present(pte)) - goto no_page; + if (!pte_present(pte)) { + swp_entry_t entry; + /* + * KSM's break_ksm() relies upon recognizing a ksm page + * even while it is being migrated, so for that case we + * need migration_entry_wait(). + */ + if (likely(!(flags & FOLL_MIGRATION))) + goto no_page; + if (pte_none(pte) || pte_file(pte)) + goto no_page; + entry = pte_to_swp_entry(pte); + if (!is_migration_entry(entry)) + goto no_page; + pte_unmap_unlock(ptep, ptl); + migration_entry_wait(mm, pmd, address); + goto split_fallthrough; + } if ((flags & FOLL_NUMA) && pte_numa(pte)) goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) -- cgit v1.2.3 From da8c87241c26aac81a64c7e4d21d438a33018f4e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 21 Feb 2013 23:32:27 +0000 Subject: vlan: adjust vlan_set_encap_proto() for its callers There are two places to call vlan_set_encap_proto(): vlan_untag() and __pop_vlan_tci(). vlan_untag() assumes skb->data points after mac addr, otherwise the following code vhdr = (struct vlan_hdr *) skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); __vlan_hwaccel_put_tag(skb, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); won't be correct. But __pop_vlan_tci() assumes points _before_ mac addr. In vlan_set_encap_proto(), it looks for some magic L2 value after mac addr: rawp = skb->data; if (*(unsigned short *) rawp == 0xFFFF) ... Therefore __pop_vlan_tci() is obviously wrong. A quick fix is avoiding using skb->data in vlan_set_encap_proto(), use 'vhdr+1' is always correct in both cases. Cc: David S. Miller Cc: Jesse Gross Signed-off-by: Cong Wang Acked-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index d06cc5c8f58c..218a3b686d90 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -331,7 +331,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { __be16 proto; - unsigned char *rawp; + unsigned short *rawp; /* * Was a VLAN packet, grab the encapsulated protocol, which the layer @@ -344,8 +344,8 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, return; } - rawp = skb->data; - if (*(unsigned short *) rawp == 0xFFFF) + rawp = (unsigned short *)(vhdr + 1); + if (*rawp == 0xFFFF) /* * This is a magic hack to spot IPX packets. Older Novell * breaks the protocol design and runs IPX over 802.3 without -- cgit v1.2.3 From 561c6731978fa128f29342495f47fc3365898b3d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 10:52:26 -0500 Subject: switch lseek to COMPAT_SYSCALL_DEFINE Signed-off-by: Al Viro --- arch/arm64/include/asm/unistd32.h | 2 +- arch/arm64/kernel/sys32.S | 5 ----- arch/parisc/kernel/sys_parisc32.c | 10 ---------- arch/parisc/kernel/syscall_table.S | 2 +- arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/kernel/sys_ppc32.c | 6 ------ arch/s390/kernel/compat_wrapper.S | 6 ------ arch/s390/kernel/syscalls.S | 2 +- arch/sparc/kernel/sys32.S | 1 - arch/sparc/kernel/systbls_64.S | 2 +- arch/x86/ia32/sys_ia32.c | 5 ----- arch/x86/include/asm/sys_ia32.h | 1 - arch/x86/syscalls/syscall_32.tbl | 2 +- fs/read_write.c | 9 ++++++++- include/linux/compat.h | 1 + 15 files changed, 15 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index e60e386178d1..8153f1a43f0a 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -40,7 +40,7 @@ __SYSCALL(15, sys_chmod) __SYSCALL(16, sys_lchown16) __SYSCALL(17, sys_ni_syscall) /* 17 was sys_break */ __SYSCALL(18, sys_ni_syscall) /* 18 was sys_stat */ -__SYSCALL(19, compat_sys_lseek_wrapper) +__SYSCALL(19, compat_sys_lseek) __SYSCALL(20, sys_getpid) __SYSCALL(21, compat_sys_mount) __SYSCALL(22, sys_ni_syscall) /* 22 was sys_umount */ diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index 6abb05721614..9416d045a687 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -58,11 +58,6 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * in registers or that take 32-bit parameters which require sign * extension. */ -compat_sys_lseek_wrapper: - sxtw x1, w1 - b sys_lseek -ENDPROC(compat_sys_lseek_wrapper) - compat_sys_pread64_wrapper: orr x3, x4, x5, lsl #32 b sys_pread64 diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index eca69bb8ef5f..051c8b90231f 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -79,16 +79,6 @@ asmlinkage long sys32_sendfile64(u32 out_fd, u32 in_fd, (loff_t __user *)offset, count); } - -/* lseek() needs a wrapper because 'offset' can be negative, but the top - * half of the argument has been zeroed by syscall.S. - */ - -asmlinkage int sys32_lseek(unsigned int fd, int offset, unsigned int origin) -{ - return sys_lseek(fd, offset, origin); -} - asmlinkage long sys32_semctl(int semid, int semnum, int cmd, union semun arg) { union semun u; diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index fc9cab1cc2df..d0efc0aeb612 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -76,7 +76,7 @@ ENTRY_SAME(socket) /* struct stat is MAYBE identical wide and narrow ?? */ ENTRY_COMP(newstat) - ENTRY_DIFF(lseek) + ENTRY_COMP(lseek) ENTRY_SAME(getpid) /* 20 */ /* the 'void * data' parameter may need re-packing in wide */ ENTRY_COMP(mount) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index d906f33441c6..535b6d8a41cc 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -22,7 +22,7 @@ SYSCALL_SPU(chmod) SYSCALL_SPU(lchown) SYSCALL(ni_syscall) OLDSYS(stat) -SYSX_SPU(sys_lseek,ppc32_lseek,sys_lseek) +COMPAT_SYS_SPU(lseek) SYSCALL_SPU(getpid) COMPAT_SYS(mount) SYSX(sys_ni_syscall,sys_oldumount,sys_oldumount) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index dbc44ba5b078..5677a36f450b 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -146,12 +146,6 @@ asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, (off_t __user *)offset, count); } -off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) -{ - /* sign extend n */ - return sys_lseek(fd, (int)offset, origin); -} - long compat_sys_truncate(const char __user * path, u32 length) { /* sign extend length */ diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index c14faf39ae36..2b1a3a03244f 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -67,12 +67,6 @@ ENTRY(sys32_lchown16_wrapper) llgfr %r4,%r4 # __kernel_old_uid_emu31_t jg sys32_lchown16 # branch to system call -ENTRY(sys32_lseek_wrapper) - llgfr %r2,%r2 # unsigned int - lgfr %r3,%r3 # off_t - llgfr %r4,%r4 # unsigned int - jg sys_lseek # branch to system call - #sys32_getpid_wrapper # void ENTRY(sys32_mount_wrapper) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index aaac708aa110..0e5262f01007 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -27,7 +27,7 @@ SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/ NI_SYSCALL /* old break syscall holder */ NI_SYSCALL /* old stat syscall holder */ -SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper) +SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index 0cfe219646e3..be3d65a3c270 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -47,7 +47,6 @@ SIGN1(sys32_mq_open, compat_sys_mq_open, %o1) SIGN1(sys32_select, compat_sys_select, %o0) SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) -SIGN1(sys32_lseek, sys_lseek, %o1) SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 6eed1945a2cd..9ed517c5037b 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -21,7 +21,7 @@ sys_call_table32: /*0*/ .word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write /*5*/ .word compat_sys_open, sys_close, compat_sys_wait4, sys_creat, sys_link /*10*/ .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys_mknod -/*15*/ .word sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, sys32_lseek +/*15*/ .word sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, compat_sys_lseek /*20*/ .word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16 /*25*/ .word sys32_vmsplice, compat_sys_ptrace, sys_alarm, compat_sys_sigaltstack, sys_pause /*30*/ .word compat_sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 592f5a9a9c0e..ad7a20cbc699 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -218,11 +218,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, * Some system calls that need sign extended arguments. This could be * done by a generic wrapper. */ -long sys32_lseek(unsigned int fd, int offset, unsigned int whence) -{ - return sys_lseek(fd, offset, whence); -} - long sys32_kill(int pid, int sig) { return sys_kill(pid, sig); diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 0218d917f509..8459efc39686 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -43,7 +43,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -long sys32_lseek(unsigned int, int, unsigned int); long sys32_kill(int, int); long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); long sys32_vm86_warning(void); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index f2fe78ff22cc..f51810be1a32 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -25,7 +25,7 @@ 16 i386 lchown sys_lchown16 17 i386 break 18 i386 oldstat sys_stat -19 i386 lseek sys_lseek sys32_lseek +19 i386 lseek sys_lseek compat_sys_lseek 20 i386 getpid sys_getpid 21 i386 mount sys_mount compat_sys_mount 22 i386 umount sys_oldumount diff --git a/fs/read_write.c b/fs/read_write.c index bb34af315280..e57796cb7b59 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "read_write.h" #include @@ -247,6 +248,13 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) return retval; } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) +{ + return sys_lseek(fd, offset, whence); +} +#endif + #ifdef __ARCH_WANT_SYS_LLSEEK SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned long, offset_low, loff_t __user *, result, @@ -278,7 +286,6 @@ out_putf: } #endif - /* * rw_verify_area doesn't like huge counts. We limit * them to something that fits in "int" so that others diff --git a/include/linux/compat.h b/include/linux/compat.h index de095b0462a7..59c72048bf20 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -359,6 +359,7 @@ asmlinkage ssize_t compat_sys_preadv(unsigned long fd, asmlinkage ssize_t compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, u32 pos_low, u32 pos_high); +asmlinkage long comat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp); -- cgit v1.2.3 From c148e9ff4bd45f26d3f0253c20efc497672c3c84 Mon Sep 17 00:00:00 2001 From: "Zhang, YiX X" Date: Tue, 8 Jan 2013 06:07:39 +0000 Subject: mmc: correct the EXCEPTION_EVENTS_STATUS value in comment The right value is 54 according to eMMC 4.5 specification. Signed-off-by: ZhangYi Signed-off-by: Chris Ball --- include/linux/mmc/card.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index be2500a49925..7069dcea27c5 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -83,7 +83,7 @@ struct mmc_ext_csd { unsigned int data_tag_unit_size; /* DATA TAG UNIT size */ unsigned int boot_ro_lock; /* ro lock support */ bool boot_ro_lockable; - u8 raw_exception_status; /* 53 */ + u8 raw_exception_status; /* 54 */ u8 raw_partition_support; /* 160 */ u8 raw_rpmb_size_mult; /* 168 */ u8 raw_erased_mem_count; /* 181 */ -- cgit v1.2.3 From a70aaa64da2205d32d1c9362d8f5d4be619cd58f Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 11 Jan 2013 17:03:50 +0000 Subject: mmc: dw_mmc: Add "disable-wp" device tree property The "disable-wp" property is used to specify that a given SD card slot doesn't have a concept of write protect. This eliminates the need for special case code for SD slots that should never be write protected (like a micro SD slot or a dev board). The dw_mmc driver is special in needing to specify "disable-wp" because the lack of a "wp-gpios" property means to use the special purpose write protect line. On some other mmc devices the lack of "wp-gpios" means that write protect should be disabled. Signed-off-by: Doug Anderson Acked-by: Seungwon Jeon Acked-by: Will Newton Acked-by: Olof Johansson Signed-off-by: Chris Ball --- .../devicetree/bindings/mmc/synopsis-dw-mshc.txt | 12 +++++-- drivers/mmc/host/dw_mmc.c | 41 +++++++++++++++++++++- include/linux/mmc/dw_mmc.h | 9 +++++ 3 files changed, 59 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt index 06cd32d08052..726fd2122a13 100644 --- a/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt @@ -26,8 +26,16 @@ Required Properties: * bus-width: as documented in mmc core bindings. * wp-gpios: specifies the write protect gpio line. The format of the - gpio specifier depends on the gpio controller. If the write-protect - line is not available, this property is optional. + gpio specifier depends on the gpio controller. If a GPIO is not used + for write-protect, this property is optional. + + * disable-wp: If the wp-gpios property isn't present then (by default) + we'd assume that the write protect is hooked up directly to the + controller's special purpose write protect line (accessible via + the WRTPRT register). However, it's possible that we simply don't + want write protect. In that case specify 'disable-wp'. + NOTE: This property is not required for slots known to always + connect to eMMC or SDIO cards. Optional properties: diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 323c5022c2ca..90f7d990551b 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -74,6 +74,7 @@ struct idmac_desc { * struct dw_mci_slot - MMC slot state * @mmc: The mmc_host representing this slot. * @host: The MMC controller this slot is using. + * @quirks: Slot-level quirks (DW_MCI_SLOT_QUIRK_XXX) * @ctype: Card type for this slot. * @mrq: mmc_request currently being processed or waiting to be * processed, or NULL when the slot is idle. @@ -88,6 +89,8 @@ struct dw_mci_slot { struct mmc_host *mmc; struct dw_mci *host; + int quirks; + u32 ctype; struct mmc_request *mrq; @@ -825,7 +828,13 @@ static int dw_mci_get_ro(struct mmc_host *mmc) struct dw_mci_board *brd = slot->host->pdata; /* Use platform get_ro function, else try on board write protect */ - if (brd->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT) + + /* + * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future + * patch in the series once reference to it is removed. + */ + if ((brd->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT) || + (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)) read_only = 0; else if (brd->get_ro) read_only = brd->get_ro(slot->id); @@ -1785,6 +1794,30 @@ static struct device_node *dw_mci_of_find_slot_node(struct device *dev, u8 slot) return NULL; } +static struct dw_mci_of_slot_quirks { + char *quirk; + int id; +} of_slot_quirks[] = { + { + .quirk = "disable-wp", + .id = DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT, + }, +}; + +static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot) +{ + struct device_node *np = dw_mci_of_find_slot_node(dev, slot); + int quirks = 0; + int idx; + + /* get quirks */ + for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++) + if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) + quirks |= of_slot_quirks[idx].id; + + return quirks; +} + /* find out bus-width for a given slot */ static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot) { @@ -1800,6 +1833,10 @@ static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot) return bus_wd; } #else /* CONFIG_OF */ +static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot) +{ + return 0; +} static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot) { return 1; @@ -1828,6 +1865,8 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) slot->host = host; host->slot[id] = slot; + slot->quirks = dw_mci_of_get_slot_quirks(host->dev, slot->id); + mmc->ops = &dw_mci_ops; mmc->f_min = DIV_ROUND_UP(host->bus_hz, 510); mmc->f_max = host->bus_hz; diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 34be4f47293c..de61de5608c9 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -209,9 +209,18 @@ struct dw_mci_dma_ops { #define DW_MCI_QUIRK_HIGHSPEED BIT(2) /* Unreliable card detection */ #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3) + /* Write Protect detection not available */ +/* + * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future + * patch in the series once reference to it is removed. + */ #define DW_MCI_QUIRK_NO_WRITE_PROTECT BIT(4) +/* Slot level quirks */ +/* This slot has no write protect */ +#define DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT BIT(0) + struct dma_pdata; struct block_settings { -- cgit v1.2.3 From 9640639b09313af4cd37a465408643aba927808e Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 11 Jan 2013 17:03:54 +0000 Subject: mmc: dw_mmc: Remove DW_MCI_QUIRK_NO_WRITE_PROTECT The original quirk was added in the change 'mmc: dw_mmc: add quirk to indicate missing write protect line'. The original quirk was added at a controller level even though each slot has its own write protect (so the quirk should be at the slot level). A recent change (mmc: dw_mmc: Add "disable-wp" device tree property) added a slot-level quirk and support for the quirk directly to dw_mmc. Signed-off-by: Doug Anderson Acked-by: Will Newton Acked-by: Olof Johansson Signed-off-by: Chris Ball --- drivers/mmc/host/dw_mmc.c | 8 +------- include/linux/mmc/dw_mmc.h | 7 ------- 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index df6207909fe7..60063ccb4c4b 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -831,13 +831,7 @@ static int dw_mci_get_ro(struct mmc_host *mmc) struct dw_mci_board *brd = slot->host->pdata; /* Use platform get_ro function, else try on board write protect */ - - /* - * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future - * patch in the series once reference to it is removed. - */ - if ((brd->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT) || - (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)) + if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) read_only = 0; else if (brd->get_ro) read_only = brd->get_ro(slot->id); diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index de61de5608c9..198f0fa44e9f 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -210,13 +210,6 @@ struct dw_mci_dma_ops { /* Unreliable card detection */ #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3) -/* Write Protect detection not available */ -/* - * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future - * patch in the series once reference to it is removed. - */ -#define DW_MCI_QUIRK_NO_WRITE_PROTECT BIT(4) - /* Slot level quirks */ /* This slot has no write protect */ #define DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT BIT(0) -- cgit v1.2.3 From af51079e68d4759e458b0592df5d1fab373c43ae Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 21 Jan 2013 19:02:28 +0800 Subject: mmc: sdhci-esdhc-imx: support 8bit mode The i.MX esdhc has a nonstandard bit layout for the SDHCI_HOST_CONTROL register. To support 8bit bus width on i.MX populate the platform_bus_width callback. This is tested on an i.MX25, but should according to the datasheets work on the other i.MX using this hardware aswell. The i.MX6, while having a SDHCI_SPEC_300 controller, still uses the same nonstandard register layout. Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo Tested-by: Dirk Behme Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-esdhc-imx.c | 56 +++++++++++++++++++++++++++-- include/linux/platform_data/mmc-esdhc-imx.h | 1 + 2 files changed, 55 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 24daaf4e20ba..f7ee5e67516c 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -40,6 +40,13 @@ /* Bits 3 and 6 are not SDHCI standard definitions */ #define ESDHC_MIX_CTRL_SDHCI_MASK 0xb7 +/* + * Our interpretation of the SDHCI_HOST_CONTROL register + */ +#define ESDHC_CTRL_4BITBUS (0x1 << 1) +#define ESDHC_CTRL_8BITBUS (0x2 << 1) +#define ESDHC_CTRL_BUSWIDTH_MASK (0x3 << 1) + /* * There is an INT DMA ERR mis-match between eSDHC and STD SDHC SPEC: * Bit25 is used in STD SPEC, and is reserved in fsl eSDHC design, @@ -294,6 +301,7 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = pltfm_host->priv; u32 new_val; + u32 mask; switch (reg) { case SDHCI_POWER_CONTROL: @@ -304,7 +312,7 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) return; case SDHCI_HOST_CONTROL: /* FSL messed up here, so we need to manually compose it. */ - new_val = val & (SDHCI_CTRL_LED | SDHCI_CTRL_4BITBUS); + new_val = val & SDHCI_CTRL_LED; /* ensure the endianness */ new_val |= ESDHC_HOST_CONTROL_LE; /* bits 8&9 are reserved on mx25 */ @@ -313,7 +321,13 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) new_val |= (val & SDHCI_CTRL_DMA_MASK) << 5; } - esdhc_clrset_le(host, 0xffff, new_val, reg); + /* + * Do not touch buswidth bits here. This is done in + * esdhc_pltfm_bus_width. + */ + mask = 0xffff & ~ESDHC_CTRL_BUSWIDTH_MASK; + + esdhc_clrset_le(host, mask, new_val, reg); return; } esdhc_clrset_le(host, 0xff, val, reg); @@ -370,6 +384,28 @@ static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host) return -ENOSYS; } +static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width) +{ + u32 ctrl; + + switch (width) { + case MMC_BUS_WIDTH_8: + ctrl = ESDHC_CTRL_8BITBUS; + break; + case MMC_BUS_WIDTH_4: + ctrl = ESDHC_CTRL_4BITBUS; + break; + default: + ctrl = 0; + break; + } + + esdhc_clrset_le(host, ESDHC_CTRL_BUSWIDTH_MASK, ctrl, + SDHCI_HOST_CONTROL); + + return 0; +} + static struct sdhci_ops sdhci_esdhc_ops = { .read_l = esdhc_readl_le, .read_w = esdhc_readw_le, @@ -380,6 +416,7 @@ static struct sdhci_ops sdhci_esdhc_ops = { .get_max_clock = esdhc_pltfm_get_max_clock, .get_min_clock = esdhc_pltfm_get_min_clock, .get_ro = esdhc_pltfm_get_ro, + .platform_bus_width = esdhc_pltfm_bus_width, }; static struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { @@ -417,6 +454,8 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, if (gpio_is_valid(boarddata->wp_gpio)) boarddata->wp_type = ESDHC_WP_GPIO; + of_property_read_u32(np, "bus-width", &boarddata->max_bus_width); + return 0; } #else @@ -548,6 +587,19 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) break; } + switch (boarddata->max_bus_width) { + case 8: + host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_4_BIT_DATA; + break; + case 4: + host->mmc->caps |= MMC_CAP_4_BIT_DATA; + break; + case 1: + default: + host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; + break; + } + err = sdhci_add_host(host); if (err) goto disable_clk; diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h index aaf97481f413..b4a0521ce411 100644 --- a/include/linux/platform_data/mmc-esdhc-imx.h +++ b/include/linux/platform_data/mmc-esdhc-imx.h @@ -39,5 +39,6 @@ struct esdhc_platform_data { unsigned int cd_gpio; enum wp_types wp_type; enum cd_types cd_type; + int max_bus_width; }; #endif /* __ASM_ARCH_IMX_ESDHC_H */ -- cgit v1.2.3 From d887874e0ead6a0b86b6046b872730c81c121352 Mon Sep 17 00:00:00 2001 From: Johan Rudholm Date: Mon, 28 Jan 2013 15:08:26 +0100 Subject: mmc: core: Add card_busy to host_ops This host_ops member is used to test if the card is signaling busy by pulling dat[0:3] low. Signed-off-by: Johan Rudholm Acked-by: Ulf Hansson Tested-by: Wei WANG Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 523d570f58ad..0373b0a6daac 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -131,6 +131,9 @@ struct mmc_host_ops { int (*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios); + /* Check if the card is pulling dat[0:3] low */ + int (*card_busy)(struct mmc_host *host); + /* The tuning command opcode value is different for SD and eMMC cards */ int (*execute_tuning)(struct mmc_host *host, u32 opcode); void (*enable_preset_value)(struct mmc_host *host, bool enable); -- cgit v1.2.3 From 52983382c74f59a3953e622d7661a24e1bc4388a Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 31 Jan 2013 11:31:37 +0800 Subject: mmc: sdhci: enhance preset value function 4d55c5a1 ("mmc: sdhci: enable preset value after uhs initialization") added preset value support and enabled it by default during sd card init. Below are the enhancements introduced by this patch: 1. In current code, preset value is enabled after setting clock finished, which means the clock is manually set by driver firstly and then suddenly switched to preset value at this point. So the first setting is useless and unnecessary. What's more, the first clock setting may differ from the preset one. The better way is enable preset value just after switch to UHS mode so the preset value can take effect immediately. So move preset value enable from mmc_sd_init_card to sdhci_set_ios which will be called during set timing. 2. In current code, preset value is disabled at the beginning of mmc_attach_sd. It's too late since low freq (400khz) should be set in mmc_power_up. So move preset value disable to sdhci_set_ios which will be called during power up. 3. host->clock and ios->drv_type should also be updated according to the preset value if it's enabled. Current code missed this. 4. This patch also introduce a quirk to disable preset value in case preset value doesn't work. This patch has been verified on sdhci-pxav3 platform with both preset enabled and disabled. Signed-off-by: Kevin Liu Reviewed-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/sd.c | 17 ------ drivers/mmc/host/sdhci.c | 129 ++++++++++++++++++++++++++++++++-------------- drivers/mmc/host/sdhci.h | 12 +++++ include/linux/mmc/host.h | 1 - include/linux/mmc/sdhci.h | 1 + 5 files changed, 102 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 03134b1e563c..9e645e19cec6 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -969,16 +969,6 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, /* Card is an ultra-high-speed card */ mmc_card_set_uhs(card); - - /* - * Since initialization is now complete, enable preset - * value registers for UHS-I cards. - */ - if (host->ops->enable_preset_value) { - mmc_host_clk_hold(card->host); - host->ops->enable_preset_value(host, true); - mmc_host_clk_release(card->host); - } } else { /* * Attempt to change to high-speed (if supported) @@ -1157,13 +1147,6 @@ int mmc_attach_sd(struct mmc_host *host) BUG_ON(!host); WARN_ON(!host->claimed); - /* Disable preset value enable if already set since last time */ - if (host->ops->enable_preset_value) { - mmc_host_clk_hold(host); - host->ops->enable_preset_value(host, false); - mmc_host_clk_release(host); - } - err = mmc_send_app_op_cond(host, 0, &ocr); if (err) return err; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index efb112684787..ba586ae99252 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -53,6 +53,7 @@ static void sdhci_send_command(struct sdhci_host *, struct mmc_command *); static void sdhci_finish_command(struct sdhci_host *); static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode); static void sdhci_tuning_timer(unsigned long data); +static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable); #ifdef CONFIG_PM_RUNTIME static int sdhci_runtime_pm_get(struct sdhci_host *host); @@ -1082,6 +1083,37 @@ static void sdhci_finish_command(struct sdhci_host *host) } } +static u16 sdhci_get_preset_value(struct sdhci_host *host) +{ + u16 ctrl, preset = 0; + + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); + + switch (ctrl & SDHCI_CTRL_UHS_MASK) { + case SDHCI_CTRL_UHS_SDR12: + preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12); + break; + case SDHCI_CTRL_UHS_SDR25: + preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR25); + break; + case SDHCI_CTRL_UHS_SDR50: + preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR50); + break; + case SDHCI_CTRL_UHS_SDR104: + preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104); + break; + case SDHCI_CTRL_UHS_DDR50: + preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50); + break; + default: + pr_warn("%s: Invalid UHS-I mode selected\n", + mmc_hostname(host->mmc)); + preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12); + break; + } + return preset; +} + static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) { int div = 0; /* Initialized for compiler warning */ @@ -1106,35 +1138,43 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) goto out; if (host->version >= SDHCI_SPEC_300) { + if (sdhci_readw(host, SDHCI_HOST_CONTROL2) & + SDHCI_CTRL_PRESET_VAL_ENABLE) { + u16 pre_val; + + clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); + pre_val = sdhci_get_preset_value(host); + div = (pre_val & SDHCI_PRESET_SDCLK_FREQ_MASK) + >> SDHCI_PRESET_SDCLK_FREQ_SHIFT; + if (host->clk_mul && + (pre_val & SDHCI_PRESET_CLKGEN_SEL_MASK)) { + clk = SDHCI_PROG_CLOCK_MODE; + real_div = div + 1; + clk_mul = host->clk_mul; + } else { + real_div = max_t(int, 1, div << 1); + } + goto clock_set; + } + /* * Check if the Host Controller supports Programmable Clock * Mode. */ if (host->clk_mul) { - u16 ctrl; - + for (div = 1; div <= 1024; div++) { + if ((host->max_clk * host->clk_mul / div) + <= clock) + break; + } /* - * We need to figure out whether the Host Driver needs - * to select Programmable Clock Mode, or the value can - * be set automatically by the Host Controller based on - * the Preset Value registers. + * Set Programmable Clock Mode in the Clock + * Control register. */ - ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); - if (!(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) { - for (div = 1; div <= 1024; div++) { - if (((host->max_clk * host->clk_mul) / - div) <= clock) - break; - } - /* - * Set Programmable Clock Mode in the Clock - * Control register. - */ - clk = SDHCI_PROG_CLOCK_MODE; - real_div = div; - clk_mul = host->clk_mul; - div--; - } + clk = SDHCI_PROG_CLOCK_MODE; + real_div = div; + clk_mul = host->clk_mul; + div--; } else { /* Version 3.00 divisors must be a multiple of 2. */ if (host->max_clk <= clock) @@ -1159,6 +1199,7 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) div >>= 1; } +clock_set: if (real_div) host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div; @@ -1376,6 +1417,10 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) sdhci_reinit(host); } + if (host->version >= SDHCI_SPEC_300 && + (ios->power_mode == MMC_POWER_UP)) + sdhci_enable_preset_value(host, false); + sdhci_set_clock(host, ios->clock); if (ios->power_mode == MMC_POWER_OFF) @@ -1496,6 +1541,20 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); } + if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + ((ios->timing == MMC_TIMING_UHS_SDR12) || + (ios->timing == MMC_TIMING_UHS_SDR25) || + (ios->timing == MMC_TIMING_UHS_SDR50) || + (ios->timing == MMC_TIMING_UHS_SDR104) || + (ios->timing == MMC_TIMING_UHS_DDR50))) { + u16 preset; + + sdhci_enable_preset_value(host, true); + preset = sdhci_get_preset_value(host); + ios->drv_type = (preset & SDHCI_PRESET_DRV_MASK) + >> SDHCI_PRESET_DRV_SHIFT; + } + /* Re-enable SD Clock */ sdhci_update_clock(host); } else @@ -1925,17 +1984,15 @@ out: return err; } -static void sdhci_do_enable_preset_value(struct sdhci_host *host, bool enable) + +static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable) { u16 ctrl; - unsigned long flags; /* Host Controller v3.00 defines preset value registers */ if (host->version < SDHCI_SPEC_300) return; - spin_lock_irqsave(&host->lock, flags); - ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); /* @@ -1951,17 +2008,6 @@ static void sdhci_do_enable_preset_value(struct sdhci_host *host, bool enable) sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); host->flags &= ~SDHCI_PV_ENABLED; } - - spin_unlock_irqrestore(&host->lock, flags); -} - -static void sdhci_enable_preset_value(struct mmc_host *mmc, bool enable) -{ - struct sdhci_host *host = mmc_priv(mmc); - - sdhci_runtime_pm_get(host); - sdhci_do_enable_preset_value(host, enable); - sdhci_runtime_pm_put(host); } static void sdhci_card_event(struct mmc_host *mmc) @@ -1997,7 +2043,6 @@ static const struct mmc_host_ops sdhci_ops = { .enable_sdio_irq = sdhci_enable_sdio_irq, .start_signal_voltage_switch = sdhci_start_signal_voltage_switch, .execute_tuning = sdhci_execute_tuning, - .enable_preset_value = sdhci_enable_preset_value, .card_event = sdhci_card_event, .card_busy = sdhci_card_busy, }; @@ -2591,8 +2636,12 @@ int sdhci_runtime_resume_host(struct sdhci_host *host) sdhci_do_set_ios(host, &host->mmc->ios); sdhci_do_start_signal_voltage_switch(host, &host->mmc->ios); - if (host_flags & SDHCI_PV_ENABLED) - sdhci_do_enable_preset_value(host, true); + if ((host_flags & SDHCI_PV_ENABLED) && + !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN)) { + spin_lock_irqsave(&host->lock, flags); + sdhci_enable_preset_value(host, true); + spin_unlock_irqrestore(&host->lock, flags); + } /* Set the re-tuning expiration flag */ if (host->flags & SDHCI_USING_RETUNING_TIMER) diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index c8d11b904a40..379e09d9f3c1 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -229,6 +229,18 @@ /* 60-FB reserved */ +#define SDHCI_PRESET_FOR_SDR12 0x66 +#define SDHCI_PRESET_FOR_SDR25 0x68 +#define SDHCI_PRESET_FOR_SDR50 0x6A +#define SDHCI_PRESET_FOR_SDR104 0x6C +#define SDHCI_PRESET_FOR_DDR50 0x6E +#define SDHCI_PRESET_DRV_MASK 0xC000 +#define SDHCI_PRESET_DRV_SHIFT 14 +#define SDHCI_PRESET_CLKGEN_SEL_MASK 0x400 +#define SDHCI_PRESET_CLKGEN_SEL_SHIFT 10 +#define SDHCI_PRESET_SDCLK_FREQ_MASK 0x3FF +#define SDHCI_PRESET_SDCLK_FREQ_SHIFT 0 + #define SDHCI_SLOT_INT_STATUS 0xFC #define SDHCI_HOST_VERSION 0xFE diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0373b0a6daac..6c235e03de29 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -136,7 +136,6 @@ struct mmc_host_ops { /* The tuning command opcode value is different for SD and eMMC cards */ int (*execute_tuning)(struct mmc_host *host, u32 opcode); - void (*enable_preset_value)(struct mmc_host *host, bool enable); int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv); void (*hw_reset)(struct mmc_host *host); void (*card_event)(struct mmc_host *host); diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 4bbc3301fbbf..b838ffc49e4a 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -94,6 +94,7 @@ struct sdhci_host { #define SDHCI_QUIRK2_HOST_NO_CMD23 (1<<1) /* The system physically doesn't support 1.8v, even if the host does */ #define SDHCI_QUIRK2_NO_1_8_V (1<<2) +#define SDHCI_QUIRK2_PRESET_VALUE_BROKEN (1<<3) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3 From abd9ac144947d9a604beb763339e2f77ce8bec79 Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Wed, 6 Feb 2013 17:01:43 +0900 Subject: mmc: add packed command feature of eMMC4.5 This patch adds packed command feature of eMMC4.5. The maximum number for packing read (or write) is offered and exception event relevant to packed command which is used for error handling is enabled. If host wants to use this feature, MMC_CAP2_PACKED_CMD should be set. Signed-off-by: Seungwon Jeon Reviewed-by: Maya Erez Reviewed-by: Subhash Jadavani Reviewed-by: Namjae Jeon Signed-off-by: Chris Ball --- drivers/mmc/core/mmc.c | 28 ++++++++++++++++++++++++++++ include/linux/mmc/card.h | 3 +++ include/linux/mmc/host.h | 4 ++++ include/linux/mmc/mmc.h | 15 +++++++++++++-- 4 files changed, 48 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 8a3ad602a877..c8f3d6e0684e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -538,6 +538,11 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) } else { card->ext_csd.data_tag_unit_size = 0; } + + card->ext_csd.max_packed_writes = + ext_csd[EXT_CSD_MAX_PACKED_WRITES]; + card->ext_csd.max_packed_reads = + ext_csd[EXT_CSD_MAX_PACKED_READS]; } else { card->ext_csd.data_sector_size = 512; } @@ -1275,6 +1280,29 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, } } + /* + * The mandatory minimum values are defined for packed command. + * read: 5, write: 3 + */ + if (card->ext_csd.max_packed_writes >= 3 && + card->ext_csd.max_packed_reads >= 5 && + host->caps2 & MMC_CAP2_PACKED_CMD) { + err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_EXP_EVENTS_CTRL, + EXT_CSD_PACKED_EVENT_EN, + card->ext_csd.generic_cmd6_time); + if (err && err != -EBADMSG) + goto free_card; + if (err) { + pr_warn("%s: Enabling packed event failed\n", + mmc_hostname(card->host)); + card->ext_csd.packed_event_en = 0; + err = 0; + } else { + card->ext_csd.packed_event_en = 1; + } + } + if (!oldcard) host->card = card; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 7069dcea27c5..237f253f2fe0 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -53,6 +53,9 @@ struct mmc_ext_csd { u8 part_config; u8 cache_ctrl; u8 rst_n_function; + u8 max_packed_writes; + u8 max_packed_reads; + u8 packed_event_en; unsigned int part_time; /* Units: ms */ unsigned int sa_timeout; /* Units: 100ns */ unsigned int generic_cmd6_time; /* Units: 10ms */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6c235e03de29..a0466c03f5e1 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -276,6 +276,10 @@ struct mmc_host { #define MMC_CAP2_HC_ERASE_SZ (1 << 9) /* High-capacity erase size */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ +#define MMC_CAP2_PACKED_RD (1 << 12) /* Allow packed read */ +#define MMC_CAP2_PACKED_WR (1 << 13) /* Allow packed write */ +#define MMC_CAP2_PACKED_CMD (MMC_CAP2_PACKED_RD | \ + MMC_CAP2_PACKED_WR) mmc_pm_flag_t pm_caps; /* supported pm features */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 94d532e41c61..50bcde3677ca 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -139,7 +139,7 @@ static inline bool mmc_op_multi(u32 opcode) #define R1_CURRENT_STATE(x) ((x & 0x00001E00) >> 9) /* sx, b (4 bits) */ #define R1_READY_FOR_DATA (1 << 8) /* sx, a */ #define R1_SWITCH_ERROR (1 << 7) /* sx, c */ -#define R1_EXCEPTION_EVENT (1 << 6) /* sx, a */ +#define R1_EXCEPTION_EVENT (1 << 6) /* sr, a */ #define R1_APP_CMD (1 << 5) /* sr, c */ #define R1_STATE_IDLE 0 @@ -275,7 +275,10 @@ struct _mmc_csd { #define EXT_CSD_FLUSH_CACHE 32 /* W */ #define EXT_CSD_CACHE_CTRL 33 /* R/W */ #define EXT_CSD_POWER_OFF_NOTIFICATION 34 /* R/W */ -#define EXT_CSD_EXP_EVENTS_STATUS 54 /* RO */ +#define EXT_CSD_PACKED_FAILURE_INDEX 35 /* RO */ +#define EXT_CSD_PACKED_CMD_STATUS 36 /* RO */ +#define EXT_CSD_EXP_EVENTS_STATUS 54 /* RO, 2 bytes */ +#define EXT_CSD_EXP_EVENTS_CTRL 56 /* R/W, 2 bytes */ #define EXT_CSD_DATA_SECTOR_SIZE 61 /* R */ #define EXT_CSD_GP_SIZE_MULT 143 /* R/W */ #define EXT_CSD_PARTITION_ATTRIBUTE 156 /* R/W */ @@ -324,6 +327,8 @@ struct _mmc_csd { #define EXT_CSD_CACHE_SIZE 249 /* RO, 4 bytes */ #define EXT_CSD_TAG_UNIT_SIZE 498 /* RO */ #define EXT_CSD_DATA_TAG_SUPPORT 499 /* RO */ +#define EXT_CSD_MAX_PACKED_WRITES 500 /* RO */ +#define EXT_CSD_MAX_PACKED_READS 501 /* RO */ #define EXT_CSD_BKOPS_SUPPORT 502 /* RO */ #define EXT_CSD_HPI_FEATURES 503 /* RO */ @@ -385,6 +390,9 @@ struct _mmc_csd { #define EXT_CSD_PWR_CL_4BIT_MASK 0x0F /* 8 bit PWR CLS */ #define EXT_CSD_PWR_CL_8BIT_SHIFT 4 #define EXT_CSD_PWR_CL_4BIT_SHIFT 0 + +#define EXT_CSD_PACKED_EVENT_EN BIT(3) + /* * EXCEPTION_EVENT_STATUS field */ @@ -393,6 +401,9 @@ struct _mmc_csd { #define EXT_CSD_SYSPOOL_EXHAUSTED BIT(2) #define EXT_CSD_PACKED_FAILURE BIT(3) +#define EXT_CSD_PACKED_GENERIC_ERROR BIT(0) +#define EXT_CSD_PACKED_INDEXED_ERROR BIT(1) + /* * BKOPS status level */ -- cgit v1.2.3 From ce39f9d17c14e56ea6772aa84393e6e0cc8499c4 Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Wed, 6 Feb 2013 17:02:46 +0900 Subject: mmc: support packed write command for eMMC4.5 devices This patch supports packed write command of eMMC4.5 devices. Several writes can be grouped in packed command and all data of the individual commands can be sent in a single transfer on the bus. Large amounts of data in one transfer rather than several data of small size are effective for eMMC write internally. As a result, packed command help write throughput be improved. The following tables show the results of packed write. Type A: test none | packed iozone 25.8 | 31 tiotest 27.6 | 31.2 lmdd 31.2 | 35.4 Type B: test none | packed iozone 44.1 | 51.1 tiotest 47.9 | 52.5 lmdd 51.6 | 59.2 Type C: test none | packed iozone 19.5 | 32 tiotest 19.9 | 34.5 lmdd 22.8 | 40.7 Signed-off-by: Seungwon Jeon Reviewed-by: Maya Erez Reviewed-by: Namjae Jeon Signed-off-by: Chris Ball --- drivers/mmc/card/block.c | 455 +++++++++++++++++++++++++++++++++++++++++++-- drivers/mmc/card/queue.c | 96 +++++++++- drivers/mmc/card/queue.h | 22 +++ drivers/mmc/core/mmc_ops.c | 1 + include/linux/mmc/card.h | 5 + include/linux/mmc/core.h | 4 + include/linux/mmc/host.h | 5 + 7 files changed, 571 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 1170afe1a596..5bab73b91c20 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -59,6 +59,12 @@ MODULE_ALIAS("mmc:block"); #define INAND_CMD38_ARG_SECTRIM2 0x88 #define MMC_BLK_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ +#define mmc_req_rel_wr(req) (((req->cmd_flags & REQ_FUA) || \ + (req->cmd_flags & REQ_META)) && \ + (rq_data_dir(req) == WRITE)) +#define PACKED_CMD_VER 0x01 +#define PACKED_CMD_WR 0x02 + static DEFINE_MUTEX(block_mutex); /* @@ -89,6 +95,7 @@ struct mmc_blk_data { unsigned int flags; #define MMC_BLK_CMD23 (1 << 0) /* Can do SET_BLOCK_COUNT for multiblock */ #define MMC_BLK_REL_WR (1 << 1) /* MMC Reliable write support */ +#define MMC_BLK_PACKED_CMD (1 << 2) /* MMC packed command support */ unsigned int usage; unsigned int read_only; @@ -113,6 +120,12 @@ struct mmc_blk_data { static DEFINE_MUTEX(open_lock); +enum { + MMC_PACKED_NR_IDX = -1, + MMC_PACKED_NR_ZERO, + MMC_PACKED_NR_SINGLE, +}; + module_param(perdev_minors, int, 0444); MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device"); @@ -120,6 +133,19 @@ static inline int mmc_blk_part_switch(struct mmc_card *card, struct mmc_blk_data *md); static int get_card_status(struct mmc_card *card, u32 *status, int retries); +static inline void mmc_blk_clear_packed(struct mmc_queue_req *mqrq) +{ + struct mmc_packed *packed = mqrq->packed; + + BUG_ON(!packed); + + mqrq->cmd_type = MMC_PACKED_NONE; + packed->nr_entries = MMC_PACKED_NR_ZERO; + packed->idx_failure = MMC_PACKED_NR_IDX; + packed->retries = 0; + packed->blocks = 0; +} + static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk) { struct mmc_blk_data *md; @@ -1137,12 +1163,78 @@ static int mmc_blk_err_check(struct mmc_card *card, if (!brq->data.bytes_xfered) return MMC_BLK_RETRY; + if (mmc_packed_cmd(mq_mrq->cmd_type)) { + if (unlikely(brq->data.blocks << 9 != brq->data.bytes_xfered)) + return MMC_BLK_PARTIAL; + else + return MMC_BLK_SUCCESS; + } + if (blk_rq_bytes(req) != brq->data.bytes_xfered) return MMC_BLK_PARTIAL; return MMC_BLK_SUCCESS; } +static int mmc_blk_packed_err_check(struct mmc_card *card, + struct mmc_async_req *areq) +{ + struct mmc_queue_req *mq_rq = container_of(areq, struct mmc_queue_req, + mmc_active); + struct request *req = mq_rq->req; + struct mmc_packed *packed = mq_rq->packed; + int err, check, status; + u8 *ext_csd; + + BUG_ON(!packed); + + packed->retries--; + check = mmc_blk_err_check(card, areq); + err = get_card_status(card, &status, 0); + if (err) { + pr_err("%s: error %d sending status command\n", + req->rq_disk->disk_name, err); + return MMC_BLK_ABORT; + } + + if (status & R1_EXCEPTION_EVENT) { + ext_csd = kzalloc(512, GFP_KERNEL); + if (!ext_csd) { + pr_err("%s: unable to allocate buffer for ext_csd\n", + req->rq_disk->disk_name); + return -ENOMEM; + } + + err = mmc_send_ext_csd(card, ext_csd); + if (err) { + pr_err("%s: error %d sending ext_csd\n", + req->rq_disk->disk_name, err); + check = MMC_BLK_ABORT; + goto free; + } + + if ((ext_csd[EXT_CSD_EXP_EVENTS_STATUS] & + EXT_CSD_PACKED_FAILURE) && + (ext_csd[EXT_CSD_PACKED_CMD_STATUS] & + EXT_CSD_PACKED_GENERIC_ERROR)) { + if (ext_csd[EXT_CSD_PACKED_CMD_STATUS] & + EXT_CSD_PACKED_INDEXED_ERROR) { + packed->idx_failure = + ext_csd[EXT_CSD_PACKED_FAILURE_INDEX] - 1; + check = MMC_BLK_PARTIAL; + } + pr_err("%s: packed cmd failed, nr %u, sectors %u, " + "failure index: %d\n", + req->rq_disk->disk_name, packed->nr_entries, + packed->blocks, packed->idx_failure); + } +free: + kfree(ext_csd); + } + + return check; +} + static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_card *card, int disable_multi, @@ -1297,10 +1389,221 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, mmc_queue_bounce_pre(mqrq); } +static inline u8 mmc_calc_packed_hdr_segs(struct request_queue *q, + struct mmc_card *card) +{ + unsigned int hdr_sz = mmc_large_sector(card) ? 4096 : 512; + unsigned int max_seg_sz = queue_max_segment_size(q); + unsigned int len, nr_segs = 0; + + do { + len = min(hdr_sz, max_seg_sz); + hdr_sz -= len; + nr_segs++; + } while (hdr_sz); + + return nr_segs; +} + +static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req) +{ + struct request_queue *q = mq->queue; + struct mmc_card *card = mq->card; + struct request *cur = req, *next = NULL; + struct mmc_blk_data *md = mq->data; + struct mmc_queue_req *mqrq = mq->mqrq_cur; + bool en_rel_wr = card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN; + unsigned int req_sectors = 0, phys_segments = 0; + unsigned int max_blk_count, max_phys_segs; + bool put_back = true; + u8 max_packed_rw = 0; + u8 reqs = 0; + + if (!(md->flags & MMC_BLK_PACKED_CMD)) + goto no_packed; + + if ((rq_data_dir(cur) == WRITE) && + mmc_host_packed_wr(card->host)) + max_packed_rw = card->ext_csd.max_packed_writes; + + if (max_packed_rw == 0) + goto no_packed; + + if (mmc_req_rel_wr(cur) && + (md->flags & MMC_BLK_REL_WR) && !en_rel_wr) + goto no_packed; + + if (mmc_large_sector(card) && + !IS_ALIGNED(blk_rq_sectors(cur), 8)) + goto no_packed; + + mmc_blk_clear_packed(mqrq); + + max_blk_count = min(card->host->max_blk_count, + card->host->max_req_size >> 9); + if (unlikely(max_blk_count > 0xffff)) + max_blk_count = 0xffff; + + max_phys_segs = queue_max_segments(q); + req_sectors += blk_rq_sectors(cur); + phys_segments += cur->nr_phys_segments; + + if (rq_data_dir(cur) == WRITE) { + req_sectors += mmc_large_sector(card) ? 8 : 1; + phys_segments += mmc_calc_packed_hdr_segs(q, card); + } + + do { + if (reqs >= max_packed_rw - 1) { + put_back = false; + break; + } + + spin_lock_irq(q->queue_lock); + next = blk_fetch_request(q); + spin_unlock_irq(q->queue_lock); + if (!next) { + put_back = false; + break; + } + + if (mmc_large_sector(card) && + !IS_ALIGNED(blk_rq_sectors(next), 8)) + break; + + if (next->cmd_flags & REQ_DISCARD || + next->cmd_flags & REQ_FLUSH) + break; + + if (rq_data_dir(cur) != rq_data_dir(next)) + break; + + if (mmc_req_rel_wr(next) && + (md->flags & MMC_BLK_REL_WR) && !en_rel_wr) + break; + + req_sectors += blk_rq_sectors(next); + if (req_sectors > max_blk_count) + break; + + phys_segments += next->nr_phys_segments; + if (phys_segments > max_phys_segs) + break; + + list_add_tail(&next->queuelist, &mqrq->packed->list); + cur = next; + reqs++; + } while (1); + + if (put_back) { + spin_lock_irq(q->queue_lock); + blk_requeue_request(q, next); + spin_unlock_irq(q->queue_lock); + } + + if (reqs > 0) { + list_add(&req->queuelist, &mqrq->packed->list); + mqrq->packed->nr_entries = ++reqs; + mqrq->packed->retries = reqs; + return reqs; + } + +no_packed: + mqrq->cmd_type = MMC_PACKED_NONE; + return 0; +} + +static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq, + struct mmc_card *card, + struct mmc_queue *mq) +{ + struct mmc_blk_request *brq = &mqrq->brq; + struct request *req = mqrq->req; + struct request *prq; + struct mmc_blk_data *md = mq->data; + struct mmc_packed *packed = mqrq->packed; + bool do_rel_wr, do_data_tag; + u32 *packed_cmd_hdr; + u8 hdr_blocks; + u8 i = 1; + + BUG_ON(!packed); + + mqrq->cmd_type = MMC_PACKED_WRITE; + packed->blocks = 0; + packed->idx_failure = MMC_PACKED_NR_IDX; + + packed_cmd_hdr = packed->cmd_hdr; + memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr)); + packed_cmd_hdr[0] = (packed->nr_entries << 16) | + (PACKED_CMD_WR << 8) | PACKED_CMD_VER; + hdr_blocks = mmc_large_sector(card) ? 8 : 1; + + /* + * Argument for each entry of packed group + */ + list_for_each_entry(prq, &packed->list, queuelist) { + do_rel_wr = mmc_req_rel_wr(prq) && (md->flags & MMC_BLK_REL_WR); + do_data_tag = (card->ext_csd.data_tag_unit_size) && + (prq->cmd_flags & REQ_META) && + (rq_data_dir(prq) == WRITE) && + ((brq->data.blocks * brq->data.blksz) >= + card->ext_csd.data_tag_unit_size); + /* Argument of CMD23 */ + packed_cmd_hdr[(i * 2)] = + (do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) | + (do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) | + blk_rq_sectors(prq); + /* Argument of CMD18 or CMD25 */ + packed_cmd_hdr[((i * 2)) + 1] = + mmc_card_blockaddr(card) ? + blk_rq_pos(prq) : blk_rq_pos(prq) << 9; + packed->blocks += blk_rq_sectors(prq); + i++; + } + + memset(brq, 0, sizeof(struct mmc_blk_request)); + brq->mrq.cmd = &brq->cmd; + brq->mrq.data = &brq->data; + brq->mrq.sbc = &brq->sbc; + brq->mrq.stop = &brq->stop; + + brq->sbc.opcode = MMC_SET_BLOCK_COUNT; + brq->sbc.arg = MMC_CMD23_ARG_PACKED | (packed->blocks + hdr_blocks); + brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; + + brq->cmd.opcode = MMC_WRITE_MULTIPLE_BLOCK; + brq->cmd.arg = blk_rq_pos(req); + if (!mmc_card_blockaddr(card)) + brq->cmd.arg <<= 9; + brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; + + brq->data.blksz = 512; + brq->data.blocks = packed->blocks + hdr_blocks; + brq->data.flags |= MMC_DATA_WRITE; + + brq->stop.opcode = MMC_STOP_TRANSMISSION; + brq->stop.arg = 0; + brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + + mmc_set_data_timeout(&brq->data, card); + + brq->data.sg = mqrq->sg; + brq->data.sg_len = mmc_queue_map_sg(mq, mqrq); + + mqrq->mmc_active.mrq = &brq->mrq; + mqrq->mmc_active.err_check = mmc_blk_packed_err_check; + + mmc_queue_bounce_pre(mqrq); +} + static int mmc_blk_cmd_err(struct mmc_blk_data *md, struct mmc_card *card, struct mmc_blk_request *brq, struct request *req, int ret) { + struct mmc_queue_req *mq_rq; + mq_rq = container_of(brq, struct mmc_queue_req, brq); + /* * If this is an SD card and we're writing, we can first * mark the known good sectors as ok. @@ -1317,11 +1620,84 @@ static int mmc_blk_cmd_err(struct mmc_blk_data *md, struct mmc_card *card, ret = blk_end_request(req, 0, blocks << 9); } } else { - ret = blk_end_request(req, 0, brq->data.bytes_xfered); + if (!mmc_packed_cmd(mq_rq->cmd_type)) + ret = blk_end_request(req, 0, brq->data.bytes_xfered); } return ret; } +static int mmc_blk_end_packed_req(struct mmc_queue_req *mq_rq) +{ + struct request *prq; + struct mmc_packed *packed = mq_rq->packed; + int idx = packed->idx_failure, i = 0; + int ret = 0; + + BUG_ON(!packed); + + while (!list_empty(&packed->list)) { + prq = list_entry_rq(packed->list.next); + if (idx == i) { + /* retry from error index */ + packed->nr_entries -= idx; + mq_rq->req = prq; + ret = 1; + + if (packed->nr_entries == MMC_PACKED_NR_SINGLE) { + list_del_init(&prq->queuelist); + mmc_blk_clear_packed(mq_rq); + } + return ret; + } + list_del_init(&prq->queuelist); + blk_end_request(prq, 0, blk_rq_bytes(prq)); + i++; + } + + mmc_blk_clear_packed(mq_rq); + return ret; +} + +static void mmc_blk_abort_packed_req(struct mmc_queue_req *mq_rq) +{ + struct request *prq; + struct mmc_packed *packed = mq_rq->packed; + + BUG_ON(!packed); + + while (!list_empty(&packed->list)) { + prq = list_entry_rq(packed->list.next); + list_del_init(&prq->queuelist); + blk_end_request(prq, -EIO, blk_rq_bytes(prq)); + } + + mmc_blk_clear_packed(mq_rq); +} + +static void mmc_blk_revert_packed_req(struct mmc_queue *mq, + struct mmc_queue_req *mq_rq) +{ + struct request *prq; + struct request_queue *q = mq->queue; + struct mmc_packed *packed = mq_rq->packed; + + BUG_ON(!packed); + + while (!list_empty(&packed->list)) { + prq = list_entry_rq(packed->list.prev); + if (prq->queuelist.prev != &packed->list) { + list_del_init(&prq->queuelist); + spin_lock_irq(q->queue_lock); + blk_requeue_request(mq->queue, prq); + spin_unlock_irq(q->queue_lock); + } else { + list_del_init(&prq->queuelist); + } + } + + mmc_blk_clear_packed(mq_rq); +} + static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) { struct mmc_blk_data *md = mq->data; @@ -1332,10 +1708,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) struct mmc_queue_req *mq_rq; struct request *req = rqc; struct mmc_async_req *areq; + const u8 packed_nr = 2; + u8 reqs = 0; if (!rqc && !mq->mqrq_prev->req) return 0; + if (rqc) + reqs = mmc_blk_prep_packed_list(mq, rqc); + do { if (rqc) { /* @@ -1346,9 +1727,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) (card->ext_csd.data_sector_size == 4096)) { pr_err("%s: Transfer size is not 4KB sector size aligned\n", req->rq_disk->disk_name); + mq_rq = mq->mqrq_cur; goto cmd_abort; } - mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq); + + if (reqs >= packed_nr) + mmc_blk_packed_hdr_wrq_prep(mq->mqrq_cur, + card, mq); + else + mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq); areq = &mq->mqrq_cur->mmc_active; } else areq = NULL; @@ -1372,8 +1759,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) * A block was successfully transferred. */ mmc_blk_reset_success(md, type); - ret = blk_end_request(req, 0, + + if (mmc_packed_cmd(mq_rq->cmd_type)) { + ret = mmc_blk_end_packed_req(mq_rq); + break; + } else { + ret = blk_end_request(req, 0, brq->data.bytes_xfered); + } + /* * If the blk_end_request function returns non-zero even * though all data has been transferred and no errors @@ -1406,7 +1800,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) err = mmc_blk_reset(md, card->host, type); if (!err) break; - if (err == -ENODEV) + if (err == -ENODEV || + mmc_packed_cmd(mq_rq->cmd_type)) goto cmd_abort; /* Fall through */ } @@ -1437,22 +1832,38 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) } if (ret) { - /* - * In case of a incomplete request - * prepare it again and resend. - */ - mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq); - mmc_start_req(card->host, &mq_rq->mmc_active, NULL); + if (mmc_packed_cmd(mq_rq->cmd_type)) { + if (!mq_rq->packed->retries) + goto cmd_abort; + mmc_blk_packed_hdr_wrq_prep(mq_rq, card, mq); + mmc_start_req(card->host, + &mq_rq->mmc_active, NULL); + } else { + + /* + * In case of a incomplete request + * prepare it again and resend. + */ + mmc_blk_rw_rq_prep(mq_rq, card, + disable_multi, mq); + mmc_start_req(card->host, + &mq_rq->mmc_active, NULL); + } } } while (ret); return 1; cmd_abort: - if (mmc_card_removed(card)) - req->cmd_flags |= REQ_QUIET; - while (ret) - ret = blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); + if (mmc_packed_cmd(mq_rq->cmd_type)) { + mmc_blk_abort_packed_req(mq_rq); + } else { + if (mmc_card_removed(card)) + req->cmd_flags |= REQ_QUIET; + while (ret) + ret = blk_end_request(req, -EIO, + blk_rq_cur_bytes(req)); + } start_new_req: if (rqc) { @@ -1460,6 +1871,12 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) rqc->cmd_flags |= REQ_QUIET; blk_end_request_all(rqc, -EIO); } else { + /* + * If current request is packed, it needs to put back. + */ + if (mmc_packed_cmd(mq->mqrq_cur->cmd_type)) + mmc_blk_revert_packed_req(mq, mq->mqrq_cur); + mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq); mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL); @@ -1634,6 +2051,14 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA); } + if (mmc_card_mmc(card) && + (area_type == MMC_BLK_DATA_AREA_MAIN) && + (md->flags & MMC_BLK_CMD23) && + card->ext_csd.packed_event_en) { + if (!mmc_packed_init(&md->queue, card)) + md->flags |= MMC_BLK_PACKED_CMD; + } + return md; err_putdisk: @@ -1742,6 +2167,8 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md) /* Then flush out any already in there */ mmc_cleanup_queue(&md->queue); + if (md->flags & MMC_BLK_PACKED_CMD) + mmc_packed_clean(&md->queue); mmc_blk_put(md); } } diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 5e0971016ac5..fa4e44ee7961 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -362,6 +362,49 @@ void mmc_cleanup_queue(struct mmc_queue *mq) } EXPORT_SYMBOL(mmc_cleanup_queue); +int mmc_packed_init(struct mmc_queue *mq, struct mmc_card *card) +{ + struct mmc_queue_req *mqrq_cur = &mq->mqrq[0]; + struct mmc_queue_req *mqrq_prev = &mq->mqrq[1]; + int ret = 0; + + + mqrq_cur->packed = kzalloc(sizeof(struct mmc_packed), GFP_KERNEL); + if (!mqrq_cur->packed) { + pr_warn("%s: unable to allocate packed cmd for mqrq_cur\n", + mmc_card_name(card)); + ret = -ENOMEM; + goto out; + } + + mqrq_prev->packed = kzalloc(sizeof(struct mmc_packed), GFP_KERNEL); + if (!mqrq_prev->packed) { + pr_warn("%s: unable to allocate packed cmd for mqrq_prev\n", + mmc_card_name(card)); + kfree(mqrq_cur->packed); + mqrq_cur->packed = NULL; + ret = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&mqrq_cur->packed->list); + INIT_LIST_HEAD(&mqrq_prev->packed->list); + +out: + return ret; +} + +void mmc_packed_clean(struct mmc_queue *mq) +{ + struct mmc_queue_req *mqrq_cur = &mq->mqrq[0]; + struct mmc_queue_req *mqrq_prev = &mq->mqrq[1]; + + kfree(mqrq_cur->packed); + mqrq_cur->packed = NULL; + kfree(mqrq_prev->packed); + mqrq_prev->packed = NULL; +} + /** * mmc_queue_suspend - suspend a MMC request queue * @mq: MMC queue to suspend @@ -406,6 +449,41 @@ void mmc_queue_resume(struct mmc_queue *mq) } } +static unsigned int mmc_queue_packed_map_sg(struct mmc_queue *mq, + struct mmc_packed *packed, + struct scatterlist *sg, + enum mmc_packed_type cmd_type) +{ + struct scatterlist *__sg = sg; + unsigned int sg_len = 0; + struct request *req; + + if (mmc_packed_wr(cmd_type)) { + unsigned int hdr_sz = mmc_large_sector(mq->card) ? 4096 : 512; + unsigned int max_seg_sz = queue_max_segment_size(mq->queue); + unsigned int len, remain, offset = 0; + u8 *buf = (u8 *)packed->cmd_hdr; + + remain = hdr_sz; + do { + len = min(remain, max_seg_sz); + sg_set_buf(__sg, buf + offset, len); + offset += len; + remain -= len; + (__sg++)->page_link &= ~0x02; + sg_len++; + } while (remain); + } + + list_for_each_entry(req, &packed->list, queuelist) { + sg_len += blk_rq_map_sg(mq->queue, req, __sg); + __sg = sg + (sg_len - 1); + (__sg++)->page_link &= ~0x02; + } + sg_mark_end(sg + (sg_len - 1)); + return sg_len; +} + /* * Prepare the sg list(s) to be handed of to the host driver */ @@ -414,14 +492,26 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq) unsigned int sg_len; size_t buflen; struct scatterlist *sg; + enum mmc_packed_type cmd_type; int i; - if (!mqrq->bounce_buf) - return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg); + cmd_type = mqrq->cmd_type; + + if (!mqrq->bounce_buf) { + if (mmc_packed_cmd(cmd_type)) + return mmc_queue_packed_map_sg(mq, mqrq->packed, + mqrq->sg, cmd_type); + else + return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg); + } BUG_ON(!mqrq->bounce_sg); - sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg); + if (mmc_packed_cmd(cmd_type)) + sg_len = mmc_queue_packed_map_sg(mq, mqrq->packed, + mqrq->bounce_sg, cmd_type); + else + sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg); mqrq->bounce_sg_len = sg_len; diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index e20c27b2b8b4..031bf6376c99 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -12,6 +12,23 @@ struct mmc_blk_request { struct mmc_data data; }; +enum mmc_packed_type { + MMC_PACKED_NONE = 0, + MMC_PACKED_WRITE, +}; + +#define mmc_packed_cmd(type) ((type) != MMC_PACKED_NONE) +#define mmc_packed_wr(type) ((type) == MMC_PACKED_WRITE) + +struct mmc_packed { + struct list_head list; + u32 cmd_hdr[1024]; + unsigned int blocks; + u8 nr_entries; + u8 retries; + s16 idx_failure; +}; + struct mmc_queue_req { struct request *req; struct mmc_blk_request brq; @@ -20,6 +37,8 @@ struct mmc_queue_req { struct scatterlist *bounce_sg; unsigned int bounce_sg_len; struct mmc_async_req mmc_active; + enum mmc_packed_type cmd_type; + struct mmc_packed *packed; }; struct mmc_queue { @@ -49,4 +68,7 @@ extern unsigned int mmc_queue_map_sg(struct mmc_queue *, extern void mmc_queue_bounce_pre(struct mmc_queue_req *); extern void mmc_queue_bounce_post(struct mmc_queue_req *); +extern int mmc_packed_init(struct mmc_queue *, struct mmc_card *); +extern void mmc_packed_clean(struct mmc_queue *); + #endif diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 6d8f7012d73a..49f04bc9d0eb 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -363,6 +363,7 @@ int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd) return mmc_send_cxd_data(card, card->host, MMC_SEND_EXT_CSD, ext_csd, 512); } +EXPORT_SYMBOL_GPL(mmc_send_ext_csd); int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp) { diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 237f253f2fe0..61b2c30c903b 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -310,6 +310,11 @@ static inline void mmc_part_add(struct mmc_card *card, unsigned int size, card->nr_parts++; } +static inline bool mmc_large_sector(struct mmc_card *card) +{ + return card->ext_csd.data_sector_size == 4096; +} + /* * The world is not perfect and supplies us with broken mmc/sdio devices. * For at least some of these bugs we need a work-around. diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 495d1336149c..39613b9a6fc5 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -18,6 +18,9 @@ struct mmc_request; struct mmc_command { u32 opcode; u32 arg; +#define MMC_CMD23_ARG_REL_WR (1 << 31) +#define MMC_CMD23_ARG_PACKED ((0 << 31) | (1 << 30)) +#define MMC_CMD23_ARG_TAG_REQ (1 << 29) u32 resp[4]; unsigned int flags; /* expected response type */ #define MMC_RSP_PRESENT (1 << 0) @@ -148,6 +151,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, extern void mmc_start_bkops(struct mmc_card *card, bool from_exception); extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool); extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); +extern int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd); #define MMC_ERASE_ARG 0x00000000 #define MMC_SECURE_ERASE_ARG 0x80000000 diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a0466c03f5e1..fd5fd5a6026f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -465,6 +465,11 @@ static inline int mmc_host_uhs(struct mmc_host *host) MMC_CAP_UHS_DDR50); } +static inline int mmc_host_packed_wr(struct mmc_host *host) +{ + return host->caps2 & MMC_CAP2_PACKED_WR; +} + #ifdef CONFIG_MMC_CLKGATE void mmc_host_clk_hold(struct mmc_host *host); void mmc_host_clk_release(struct mmc_host *host); -- cgit v1.2.3 From 6c56e7a0fff166904ce2715f7ab1746460c1f11b Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Sat, 16 Feb 2013 16:21:16 +0100 Subject: mmc: provide a standard MMC device-tree binding parser centrally MMC defines a number of standard DT bindings. Having each driver parse them individually adds code redundancy and is error prone. Provide a standard function to unify the parsing. After all drivers are converted to using it instead of their own parsers, this function can be integrated into mmc_alloc_host(). Signed-off-by: Guennadi Liakhovetski Signed-off-by: Chris Ball --- drivers/mmc/core/host.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mmc/host.h | 1 + 2 files changed, 111 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index ee2e16b17017..9c53452e73e1 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -23,6 +25,7 @@ #include #include +#include #include "core.h" #include "host.h" @@ -294,6 +297,113 @@ static inline void mmc_host_clk_sysfs_init(struct mmc_host *host) #endif +/** + * mmc_of_parse() - parse host's device-tree node + * @host: host whose node should be parsed. + * + * To keep the rest of the MMC subsystem unaware of whether DT has been + * used to to instantiate and configure this host instance or not, we + * parse the properties and set respective generic mmc-host flags and + * parameters. + */ +void mmc_of_parse(struct mmc_host *host) +{ + struct device_node *np; + u32 bus_width; + bool explicit_inv_wp, gpio_inv_wp = false; + enum of_gpio_flags flags; + int len, ret, gpio; + + if (!host->parent || !host->parent->of_node) + return; + + np = host->parent->of_node; + + /* "bus-width" is translated to MMC_CAP_*_BIT_DATA flags */ + if (of_property_read_u32(np, "bus-width", &bus_width) < 0) { + dev_dbg(host->parent, + "\"bus-width\" property is missing, assuming 1 bit.\n"); + bus_width = 1; + } + + switch (bus_width) { + case 8: + host->caps |= MMC_CAP_8_BIT_DATA; + /* Hosts capable of 8-bit transfers can also do 4 bits */ + case 4: + host->caps |= MMC_CAP_4_BIT_DATA; + break; + case 1: + break; + default: + dev_err(host->parent, + "Invalid \"bus-width\" value %ud!\n", bus_width); + } + + /* f_max is obtained from the optional "max-frequency" property */ + of_property_read_u32(np, "max-frequency", &host->f_max); + + /* + * Configure CD and WP pins. They are both by default active low to + * match the SDHCI spec. If GPIOs are provided for CD and / or WP, the + * mmc-gpio helpers are used to attach, configure and use them. If + * polarity inversion is specified in DT, one of MMC_CAP2_CD_ACTIVE_HIGH + * and MMC_CAP2_RO_ACTIVE_HIGH capability-2 flags is set. If the + * "broken-cd" property is provided, the MMC_CAP_NEEDS_POLL capability + * is set. If the "non-removable" property is found, the + * MMC_CAP_NONREMOVABLE capability is set and no card-detection + * configuration is performed. + */ + + /* Parse Card Detection */ + if (of_find_property(np, "non-removable", &len)) { + host->caps |= MMC_CAP_NONREMOVABLE; + } else { + bool explicit_inv_cd, gpio_inv_cd = false; + + explicit_inv_cd = of_property_read_bool(np, "cd-inverted"); + + if (of_find_property(np, "broken-cd", &len)) + host->caps |= MMC_CAP_NEEDS_POLL; + + gpio = of_get_named_gpio_flags(np, "cd-gpios", 0, &flags); + if (gpio_is_valid(gpio)) { + if (!(flags & OF_GPIO_ACTIVE_LOW)) + gpio_inv_cd = true; + + ret = mmc_gpio_request_cd(host, gpio); + if (ret < 0) + dev_err(host->parent, + "Failed to request CD GPIO #%d: %d!\n", + gpio, ret); + else + dev_info(host->parent, "Got CD GPIO #%d.\n", + gpio); + } + + if (explicit_inv_cd ^ gpio_inv_cd) + host->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH; + } + + /* Parse Write Protection */ + explicit_inv_wp = of_property_read_bool(np, "wp-inverted"); + + gpio = of_get_named_gpio_flags(np, "wp-gpios", 0, &flags); + if (gpio_is_valid(gpio)) { + if (!(flags & OF_GPIO_ACTIVE_LOW)) + gpio_inv_wp = true; + + ret = mmc_gpio_request_ro(host, gpio); + if (ret < 0) + dev_err(host->parent, + "Failed to request WP GPIO: %d!\n", ret); + } + if (explicit_inv_wp ^ gpio_inv_wp) + host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; +} + +EXPORT_SYMBOL(mmc_of_parse); + /** * mmc_alloc_host - initialise the per-host structure. * @extra: sizeof private data structure diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index fd5fd5a6026f..bf93c9a6d729 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -368,6 +368,7 @@ extern struct mmc_host *mmc_alloc_host(int extra, struct device *); extern int mmc_add_host(struct mmc_host *); extern void mmc_remove_host(struct mmc_host *); extern void mmc_free_host(struct mmc_host *); +void mmc_of_parse(struct mmc_host *host); static inline void *mmc_priv(struct mmc_host *host) { -- cgit v1.2.3 From 8c9beb117bf31cdb757bc80992281004be8a177b Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 15 Feb 2013 16:13:53 +0100 Subject: mmc: (cosmetic) remove "extern" from function declarations The "extern" keyword isn't required in function declarations, remove it. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index bf93c9a6d729..d6f20cc6415e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -364,10 +364,10 @@ struct mmc_host { unsigned long private[0] ____cacheline_aligned; }; -extern struct mmc_host *mmc_alloc_host(int extra, struct device *); -extern int mmc_add_host(struct mmc_host *); -extern void mmc_remove_host(struct mmc_host *); -extern void mmc_free_host(struct mmc_host *); +struct mmc_host *mmc_alloc_host(int extra, struct device *); +int mmc_add_host(struct mmc_host *); +void mmc_remove_host(struct mmc_host *); +void mmc_free_host(struct mmc_host *); void mmc_of_parse(struct mmc_host *host); static inline void *mmc_priv(struct mmc_host *host) @@ -381,16 +381,16 @@ static inline void *mmc_priv(struct mmc_host *host) #define mmc_classdev(x) (&(x)->class_dev) #define mmc_hostname(x) (dev_name(&(x)->class_dev)) -extern int mmc_suspend_host(struct mmc_host *); -extern int mmc_resume_host(struct mmc_host *); +int mmc_suspend_host(struct mmc_host *); +int mmc_resume_host(struct mmc_host *); -extern int mmc_power_save_host(struct mmc_host *host); -extern int mmc_power_restore_host(struct mmc_host *host); +int mmc_power_save_host(struct mmc_host *host); +int mmc_power_restore_host(struct mmc_host *host); -extern void mmc_detect_change(struct mmc_host *, unsigned long delay); -extern void mmc_request_done(struct mmc_host *, struct mmc_request *); +void mmc_detect_change(struct mmc_host *, unsigned long delay); +void mmc_request_done(struct mmc_host *, struct mmc_request *); -extern int mmc_cache_ctrl(struct mmc_host *, u8); +int mmc_cache_ctrl(struct mmc_host *, u8); static inline void mmc_signal_sdio_irq(struct mmc_host *host) { -- cgit v1.2.3 From 76a411f9f9423cbc9f62e70173459c5af54323f4 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 15 Feb 2013 16:13:57 +0100 Subject: mmc: sh_mobile_sdhi: remove unused .pdata field The struct sh_mobile_sdhi_info::pdata field was only used for platform- based card detection and isn't used anymore since the migration to GPIO- based MMC slot functions. Remove it. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Chris Ball --- drivers/mmc/host/sh_mobile_sdhi.c | 4 ---- include/linux/mmc/sh_mobile_sdhi.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 06e1bc98c536..175ab970952a 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -156,7 +156,6 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data = &priv->mmc_data; if (p) { - p->pdata = mmc_data; if (p->init) { ret = p->init(pdev, &sdhi_ops); if (ret) @@ -310,9 +309,6 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev) struct sh_mobile_sdhi_info *p = pdev->dev.platform_data; int i = 0, irq; - if (p) - p->pdata = NULL; - tmio_mmc_host_remove(host); while (1) { diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h index b65679ffa880..b76bcf0621f6 100644 --- a/include/linux/mmc/sh_mobile_sdhi.h +++ b/include/linux/mmc/sh_mobile_sdhi.h @@ -4,7 +4,6 @@ #include struct platform_device; -struct tmio_mmc_data; #define SH_MOBILE_SDHI_IRQ_CARD_DETECT "card_detect" #define SH_MOBILE_SDHI_IRQ_SDCARD "sdcard" @@ -26,7 +25,6 @@ struct sh_mobile_sdhi_info { unsigned long tmio_caps2; u32 tmio_ocr_mask; /* available MMC voltages */ unsigned int cd_gpio; - struct tmio_mmc_data *pdata; void (*set_pwr)(struct platform_device *pdev, int state); int (*get_cd)(struct platform_device *pdev); -- cgit v1.2.3 From 27902c14aa2376d53755b6c02e3be671fd890e30 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 15 Feb 2013 16:13:59 +0100 Subject: mmc: tmio: remove unused and deprecated symbols The tmio_mmc_cd_wakeup() inline function has been deprecated since 3.4 and is unused since 3.4 too. Remove them. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Chris Ball --- include/linux/mfd/tmio.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index d83af39815ab..99bf3e665997 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -64,12 +64,6 @@ * Some controllers can support SDIO IRQ signalling. */ #define TMIO_MMC_SDIO_IRQ (1 << 2) -/* - * Some platforms can detect card insertion events with controller powered - * down, using a GPIO IRQ, in which case they have to fill in cd_irq, cd_gpio, - * and cd_flags fields of struct tmio_mmc_data. - */ -#define TMIO_MMC_HAS_COLD_CD (1 << 3) /* * Some controllers require waiting for the SD bus to become * idle before writing to some registers. @@ -116,18 +110,6 @@ struct tmio_mmc_data { void (*clk_disable)(struct platform_device *pdev); }; -/* - * This function is deprecated and will be removed soon. Please, convert your - * platform to use drivers/mmc/core/cd-gpio.c - */ -#include -static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata) -{ - if (pdata) - mmc_detect_change(dev_get_drvdata(pdata->dev), - msecs_to_jiffies(100)); -} - /* * data for the NAND controller */ -- cgit v1.2.3 From 0e786102949d7461859c6ce9f39c2c8d28e42db3 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 15 Feb 2013 15:07:19 -0700 Subject: mmc: tegra: assume CONFIG_OF, remove platform data Tegra only supports, and always enables, device tree. Remove all ifdefs and runtime checks for DT support from the driver. Platform data is therefore no longer required. Rework the driver to parse the device tree directly into struct sdhci_tegra. Signed-off-by: Stephen Warren Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-tegra.c | 119 ++++++++++---------------- include/linux/platform_data/mmc-sdhci-tegra.h | 28 ------ 2 files changed, 45 insertions(+), 102 deletions(-) delete mode 100644 include/linux/platform_data/mmc-sdhci-tegra.h (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 5a600a53b876..08b06e9a3a21 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -27,8 +27,6 @@ #include -#include - #include "sdhci-pltfm.h" /* Tegra SDHOST controller vendor register definitions */ @@ -45,8 +43,11 @@ struct sdhci_tegra_soc_data { }; struct sdhci_tegra { - const struct tegra_sdhci_platform_data *plat; const struct sdhci_tegra_soc_data *soc_data; + int cd_gpio; + int wp_gpio; + int power_gpio; + int is_8bit; }; static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg) @@ -108,12 +109,11 @@ static unsigned int tegra_sdhci_get_ro(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_tegra *tegra_host = pltfm_host->priv; - const struct tegra_sdhci_platform_data *plat = tegra_host->plat; - if (!gpio_is_valid(plat->wp_gpio)) + if (!gpio_is_valid(tegra_host->wp_gpio)) return -1; - return gpio_get_value(plat->wp_gpio); + return gpio_get_value(tegra_host->wp_gpio); } static irqreturn_t carddetect_irq(int irq, void *data) @@ -147,11 +147,10 @@ static int tegra_sdhci_buswidth(struct sdhci_host *host, int bus_width) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_tegra *tegra_host = pltfm_host->priv; - const struct tegra_sdhci_platform_data *plat = tegra_host->plat; u32 ctrl; ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); - if (plat->is_8bit && bus_width == MMC_BUS_WIDTH_8) { + if (tegra_host->is_8bit && bus_width == MMC_BUS_WIDTH_8) { ctrl &= ~SDHCI_CTRL_4BITBUS; ctrl |= SDHCI_CTRL_8BITBUS; } else { @@ -217,31 +216,19 @@ static const struct of_device_id sdhci_tegra_dt_match[] = { }; MODULE_DEVICE_TABLE(of, sdhci_dt_ids); -static struct tegra_sdhci_platform_data *sdhci_tegra_dt_parse_pdata( - struct platform_device *pdev) +static void sdhci_tegra_parse_dt(struct device *dev, + struct sdhci_tegra *tegra_host) { - struct tegra_sdhci_platform_data *plat; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; u32 bus_width; - if (!np) - return NULL; - - plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); - if (!plat) { - dev_err(&pdev->dev, "Can't allocate platform data\n"); - return NULL; - } - - plat->cd_gpio = of_get_named_gpio(np, "cd-gpios", 0); - plat->wp_gpio = of_get_named_gpio(np, "wp-gpios", 0); - plat->power_gpio = of_get_named_gpio(np, "power-gpios", 0); + tegra_host->cd_gpio = of_get_named_gpio(np, "cd-gpios", 0); + tegra_host->wp_gpio = of_get_named_gpio(np, "wp-gpios", 0); + tegra_host->power_gpio = of_get_named_gpio(np, "power-gpios", 0); if (of_property_read_u32(np, "bus-width", &bus_width) == 0 && bus_width == 8) - plat->is_8bit = 1; - - return plat; + tegra_host->is_8bit = 1; } static int sdhci_tegra_probe(struct platform_device *pdev) @@ -250,7 +237,6 @@ static int sdhci_tegra_probe(struct platform_device *pdev) const struct sdhci_tegra_soc_data *soc_data; struct sdhci_host *host; struct sdhci_pltfm_host *pltfm_host; - struct tegra_sdhci_platform_data *plat; struct sdhci_tegra *tegra_host; struct clk *clk; int rc; @@ -263,52 +249,40 @@ static int sdhci_tegra_probe(struct platform_device *pdev) host = sdhci_pltfm_init(pdev, soc_data->pdata); if (IS_ERR(host)) return PTR_ERR(host); - pltfm_host = sdhci_priv(host); - plat = pdev->dev.platform_data; - - if (plat == NULL) - plat = sdhci_tegra_dt_parse_pdata(pdev); - - if (plat == NULL) { - dev_err(mmc_dev(host->mmc), "missing platform data\n"); - rc = -ENXIO; - goto err_no_plat; - } - tegra_host = devm_kzalloc(&pdev->dev, sizeof(*tegra_host), GFP_KERNEL); if (!tegra_host) { dev_err(mmc_dev(host->mmc), "failed to allocate tegra_host\n"); rc = -ENOMEM; - goto err_no_plat; + goto err_alloc_tegra_host; } - - tegra_host->plat = plat; tegra_host->soc_data = soc_data; - pltfm_host->priv = tegra_host; - if (gpio_is_valid(plat->power_gpio)) { - rc = gpio_request(plat->power_gpio, "sdhci_power"); + sdhci_tegra_parse_dt(&pdev->dev, tegra_host); + + if (gpio_is_valid(tegra_host->power_gpio)) { + rc = gpio_request(tegra_host->power_gpio, "sdhci_power"); if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate power gpio\n"); goto err_power_req; } - gpio_direction_output(plat->power_gpio, 1); + gpio_direction_output(tegra_host->power_gpio, 1); } - if (gpio_is_valid(plat->cd_gpio)) { - rc = gpio_request(plat->cd_gpio, "sdhci_cd"); + if (gpio_is_valid(tegra_host->cd_gpio)) { + rc = gpio_request(tegra_host->cd_gpio, "sdhci_cd"); if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate cd gpio\n"); goto err_cd_req; } - gpio_direction_input(plat->cd_gpio); + gpio_direction_input(tegra_host->cd_gpio); - rc = request_irq(gpio_to_irq(plat->cd_gpio), carddetect_irq, + rc = request_irq(gpio_to_irq(tegra_host->cd_gpio), + carddetect_irq, IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, mmc_hostname(host->mmc), host); @@ -319,14 +293,14 @@ static int sdhci_tegra_probe(struct platform_device *pdev) } - if (gpio_is_valid(plat->wp_gpio)) { - rc = gpio_request(plat->wp_gpio, "sdhci_wp"); + if (gpio_is_valid(tegra_host->wp_gpio)) { + rc = gpio_request(tegra_host->wp_gpio, "sdhci_wp"); if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate wp gpio\n"); goto err_wp_req; } - gpio_direction_input(plat->wp_gpio); + gpio_direction_input(tegra_host->wp_gpio); } clk = clk_get(mmc_dev(host->mmc), NULL); @@ -338,9 +312,7 @@ static int sdhci_tegra_probe(struct platform_device *pdev) clk_prepare_enable(clk); pltfm_host->clk = clk; - host->mmc->pm_caps = plat->pm_flags; - - if (plat->is_8bit) + if (tegra_host->is_8bit) host->mmc->caps |= MMC_CAP_8_BIT_DATA; rc = sdhci_add_host(host); @@ -353,19 +325,19 @@ err_add_host: clk_disable_unprepare(pltfm_host->clk); clk_put(pltfm_host->clk); err_clk_get: - if (gpio_is_valid(plat->wp_gpio)) - gpio_free(plat->wp_gpio); + if (gpio_is_valid(tegra_host->wp_gpio)) + gpio_free(tegra_host->wp_gpio); err_wp_req: - if (gpio_is_valid(plat->cd_gpio)) - free_irq(gpio_to_irq(plat->cd_gpio), host); + if (gpio_is_valid(tegra_host->cd_gpio)) + free_irq(gpio_to_irq(tegra_host->cd_gpio), host); err_cd_irq_req: - if (gpio_is_valid(plat->cd_gpio)) - gpio_free(plat->cd_gpio); + if (gpio_is_valid(tegra_host->cd_gpio)) + gpio_free(tegra_host->cd_gpio); err_cd_req: - if (gpio_is_valid(plat->power_gpio)) - gpio_free(plat->power_gpio); + if (gpio_is_valid(tegra_host->power_gpio)) + gpio_free(tegra_host->power_gpio); err_power_req: -err_no_plat: +err_alloc_tegra_host: sdhci_pltfm_free(pdev); return rc; } @@ -375,21 +347,20 @@ static int sdhci_tegra_remove(struct platform_device *pdev) struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_tegra *tegra_host = pltfm_host->priv; - const struct tegra_sdhci_platform_data *plat = tegra_host->plat; int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); sdhci_remove_host(host, dead); - if (gpio_is_valid(plat->wp_gpio)) - gpio_free(plat->wp_gpio); + if (gpio_is_valid(tegra_host->wp_gpio)) + gpio_free(tegra_host->wp_gpio); - if (gpio_is_valid(plat->cd_gpio)) { - free_irq(gpio_to_irq(plat->cd_gpio), host); - gpio_free(plat->cd_gpio); + if (gpio_is_valid(tegra_host->cd_gpio)) { + free_irq(gpio_to_irq(tegra_host->cd_gpio), host); + gpio_free(tegra_host->cd_gpio); } - if (gpio_is_valid(plat->power_gpio)) - gpio_free(plat->power_gpio); + if (gpio_is_valid(tegra_host->power_gpio)) + gpio_free(tegra_host->power_gpio); clk_disable_unprepare(pltfm_host->clk); clk_put(pltfm_host->clk); diff --git a/include/linux/platform_data/mmc-sdhci-tegra.h b/include/linux/platform_data/mmc-sdhci-tegra.h deleted file mode 100644 index 8f8430697686..000000000000 --- a/include/linux/platform_data/mmc-sdhci-tegra.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2009 Palm, Inc. - * Author: Yvonne Yip - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ -#ifndef __PLATFORM_DATA_TEGRA_SDHCI_H -#define __PLATFORM_DATA_TEGRA_SDHCI_H - -#include - -struct tegra_sdhci_platform_data { - int cd_gpio; - int wp_gpio; - int power_gpio; - int is_8bit; - int pm_flags; -}; - -#endif -- cgit v1.2.3 From 3f6d078d4accfff8b114f968259a060bfdc7c682 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 13:49:08 -0500 Subject: fix compat truncate/ftruncate Signed-off-by: Al Viro --- arch/arm64/include/asm/unistd32.h | 4 ++-- arch/mips/kernel/scall64-o32.S | 4 ++-- arch/parisc/kernel/syscall_table.S | 4 ++-- arch/powerpc/kernel/sys_ppc32.c | 12 ------------ arch/s390/kernel/compat_wrapper.S | 10 ---------- arch/s390/kernel/syscalls.S | 4 ++-- arch/sparc/kernel/sys32.S | 1 - arch/sparc/kernel/systbls_64.S | 4 ++-- arch/x86/syscalls/syscall_32.tbl | 4 ++-- fs/open.c | 15 +++++++++++++++ include/linux/compat.h | 2 ++ 11 files changed, 29 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 8153f1a43f0a..12f22492df4c 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -113,8 +113,8 @@ __SYSCALL(88, sys_reboot) __SYSCALL(89, sys_ni_syscall) /* 89 was sys_readdir */ __SYSCALL(90, sys_ni_syscall) /* 90 was sys_mmap */ __SYSCALL(91, sys_munmap) -__SYSCALL(92, sys_truncate) -__SYSCALL(93, sys_ftruncate) +__SYSCALL(92, compat_sys_truncate) +__SYSCALL(93, compat_sys_ftruncate) __SYSCALL(94, sys_fchmod) __SYSCALL(95, sys_fchown16) __SYSCALL(96, sys_getpriority) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 063cd0d6ddd2..20b100f9d360 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -284,8 +284,8 @@ sys_call_table: PTR compat_sys_old_readdir PTR sys_mips_mmap /* 4090 */ PTR sys_munmap - PTR sys_truncate - PTR sys_ftruncate + PTR compat_sys_truncate + PTR compat_sys_ftruncate PTR sys_fchmod PTR sys_fchown /* 4095 */ PTR sys_getpriority diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index d0efc0aeb612..884b91b028f0 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -165,8 +165,8 @@ ENTRY_SAME(mmap2) ENTRY_SAME(mmap) /* 90 */ ENTRY_SAME(munmap) - ENTRY_SAME(truncate) - ENTRY_SAME(ftruncate) + ENTRY_COMP(truncate) + ENTRY_COMP(ftruncate) ENTRY_SAME(fchmod) ENTRY_SAME(fchown) /* 95 */ ENTRY_SAME(getpriority) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 5677a36f450b..d0bafc0cdf06 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -146,18 +146,6 @@ asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, (off_t __user *)offset, count); } -long compat_sys_truncate(const char __user * path, u32 length) -{ - /* sign extend length */ - return sys_truncate(path, (int)length); -} - -long compat_sys_ftruncate(int fd, u32 length) -{ - /* sign extend length */ - return sys_ftruncate(fd, (int)length); -} - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 2b1a3a03244f..3c98c4dc5aca 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -325,16 +325,6 @@ ENTRY(sys32_munmap_wrapper) llgfr %r3,%r3 # size_t jg sys_munmap # branch to system call -ENTRY(sys32_truncate_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # long - jg sys_truncate # branch to system call - -ENTRY(sys32_ftruncate_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - jg sys_ftruncate # branch to system call - ENTRY(sys32_fchmod_wrapper) llgfr %r2,%r2 # unsigned int llgfr %r3,%r3 # mode_t diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 0e5262f01007..630b935d1284 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -100,8 +100,8 @@ SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper) /* 90 */ SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) -SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper) -SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper) +SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) +SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/ SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index be3d65a3c270..240a3cecc11e 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -52,7 +52,6 @@ SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) SIGN2(sys32_sync_file_range, compat_sync_file_range, %o0, %o5) SIGN1(sys32_vmsplice, compat_sys_vmsplice, %o0) -SIGN1(sys32_truncate, sys_truncate, %o1) .globl sys32_mmap2 sys32_mmap2: diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 9ed517c5037b..088134834dab 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -43,8 +43,8 @@ sys_call_table32: /*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall .word sys_getgroups, compat_sys_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd /*120*/ .word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod - .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys32_truncate -/*130*/ .word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall + .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate +/*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall .word sys_nis_syscall, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 /*140*/ .word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit .word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index f51810be1a32..e6d55f0064df 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -98,8 +98,8 @@ 89 i386 readdir sys_old_readdir compat_sys_old_readdir 90 i386 mmap sys_old_mmap sys32_mmap 91 i386 munmap sys_munmap -92 i386 truncate sys_truncate -93 i386 ftruncate sys_ftruncate +92 i386 truncate sys_truncate compat_sys_truncate +93 i386 ftruncate sys_ftruncate compat_sys_ftruncate 94 i386 fchmod sys_fchmod 95 i386 fchown sys_fchown16 96 i386 getpriority sys_getpriority diff --git a/fs/open.c b/fs/open.c index 9b33c0cbfacf..669ba0dd6667 100644 --- a/fs/open.c +++ b/fs/open.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "internal.h" @@ -140,6 +141,13 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) return do_sys_truncate(path, length); } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length) +{ + return do_sys_truncate(path, length); +} +#endif + static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; @@ -195,6 +203,13 @@ SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) return ret; } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) +{ + return do_sys_ftruncate(fd, length, 1); +} +#endif + /* LFS versions of truncate are only needed on 32 bit machines */ #if BITS_PER_LONG == 32 SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length) diff --git a/include/linux/compat.h b/include/linux/compat.h index 59c72048bf20..76a87fb57ac2 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -536,6 +536,8 @@ asmlinkage long compat_sys_openat(int dfd, const char __user *filename, asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); +asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); +asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, -- cgit v1.2.3 From 446d64e3e1154806092ac27de198dff1225797d9 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Sun, 24 Feb 2013 23:42:37 -0500 Subject: block: fix part_pack_uuid() build error Commit "85865c1 ima: add policy support for file system uuid" introduced a CONFIG_BLOCK dependency. This patch defines a wrapper called blk_part_pack_uuid(), which returns -EINVAL, when CONFIG_BLOCK is not defined. security/integrity/ima/ima_policy.c:538:4: error: implicit declaration of function 'part_pack_uuid' [-Werror=implicit-function-declaration] Changelog v2: - Reference commit number in patch description Changelog v1: - rename ima_part_pack_uuid() to blk_part_pack_uuid() - resolve scripts/checkpatch.pl warnings Changelog v0: - fix UUID scripts/Lindent msgs Reported-by: Randy Dunlap Reported-by: David Rientjes Signed-off-by: Mimi Zohar Acked-by: David Rientjes Acked-by: Randy Dunlap Cc: Jens Axboe Signed-off-by: James Morris --- include/linux/genhd.h | 10 ++++++++++ security/integrity/ima/ima_policy.c | 11 ++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 79b8bba19363..9f3c275e053e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -231,6 +231,12 @@ static inline void part_pack_uuid(const u8 *uuid_str, u8 *to) } } +static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to) +{ + part_pack_uuid(uuid_str, to); + return 0; +} + static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -718,6 +724,10 @@ static inline dev_t blk_lookup_devt(const char *name, int partno) return devt; } +static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to) +{ + return -EINVAL; +} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index b27535a13a79..399433ad614e 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -176,7 +176,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, && rule->fsmagic != inode->i_sb->s_magic) return false; if ((rule->flags & IMA_FSUUID) && - memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid))) + memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid))) return false; if ((rule->flags & IMA_UID) && !uid_eq(rule->uid, cred->uid)) return false; @@ -530,14 +530,15 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) ima_log_string(ab, "fsuuid", args[0].from); if (memchr_inv(entry->fsuuid, 0x00, - sizeof(entry->fsuuid))) { + sizeof(entry->fsuuid))) { result = -EINVAL; break; } - part_pack_uuid(args[0].from, entry->fsuuid); - entry->flags |= IMA_FSUUID; - result = 0; + result = blk_part_pack_uuid(args[0].from, + entry->fsuuid); + if (!result) + entry->flags |= IMA_FSUUID; break; case Opt_uid: ima_log_string(ab, "uid", args[0].from); -- cgit v1.2.3 From 804d6a89a5c0b076317966bcbcd7a63d42241831 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:14 +0000 Subject: mlx4: Implement memory windows allocation and deallocation Implement MW allocation and deallocation in mlx4_core and mlx4_ib. Pass down the enable bind flag when registering memory regions. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 12 +++++ drivers/infiniband/hw/mlx4/mr.c | 52 ++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mr.c | 95 +++++++++++++++++++++++++++++++++ include/linux/mlx4/device.h | 20 ++++++- 4 files changed, 178 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index cce5dde94bc1..9ba0aaf3a58e 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -116,6 +116,11 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_mw { + struct ib_mw ibmw; + struct mlx4_mw mmw; +}; + struct mlx4_ib_fast_reg_page_list { struct ib_fast_reg_page_list ibfrpl; __be64 *mapped_page_list; @@ -533,6 +538,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx4_ib_mw, ibmw); +} + static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) { return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); @@ -581,6 +591,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 254e1cf26439..5adf4c47ee18 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -41,9 +41,19 @@ static u32 convert_access(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | MLX4_PERM_LOCAL_READ; } +static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) +{ + switch (type) { + case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; + case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; + default: return -1; + } +} + struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx4_ib_mr *mr; @@ -189,6 +199,48 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mw *mw; + int err; + + mw = kmalloc(sizeof(*mw), GFP_KERNEL); + if (!mw) + return ERR_PTR(-ENOMEM); + + err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, + to_mlx4_type(type), &mw->mmw); + if (err) + goto err_free; + + err = mlx4_mw_enable(dev->dev, &mw->mmw); + if (err) + goto err_mw; + + mw->ibmw.rkey = mw->mmw.key; + + return &mw->ibmw; + +err_mw: + mlx4_mw_free(dev->dev, &mw->mmw); + +err_free: + kfree(mw); + + return ERR_PTR(err); +} + +int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) +{ + struct mlx4_ib_mw *mw = to_mmw(ibmw); + + mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); + kfree(mw); + + return 0; +} + struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 5e785bdcc694..602ca9bf78e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -654,6 +654,101 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, } EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw) +{ + u32 index; + + if ((type == MLX4_MW_TYPE_1 && + !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) || + (type == MLX4_MW_TYPE_2 && + !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN))) + return -ENOTSUPP; + + index = mlx4_mpt_reserve(dev); + if (index == -1) + return -ENOMEM; + + mw->key = hw_index_to_key(index); + mw->pd = pd; + mw->type = type; + mw->enabled = MLX4_MPT_DISABLED; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mw_alloc); + +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mpt_entry *mpt_entry; + int err; + + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + if (err) + return err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_table; + } + mpt_entry = mailbox->buf; + + memset(mpt_entry, 0, sizeof(*mpt_entry)); + + /* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned + * off, thus creating a memory window and not a memory region. + */ + mpt_entry->key = cpu_to_be32(key_to_hw_index(mw->key)); + mpt_entry->pd_flags = cpu_to_be32(mw->pd); + if (mw->type == MLX4_MW_TYPE_2) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->qpn = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV); + } + + err = mlx4_SW2HW_MPT(dev, mailbox, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_cmd; + } + mw->enabled = MLX4_MPT_EN_HW; + + mlx4_free_cmd_mailbox(dev, mailbox); + + return 0; + +err_cmd: + mlx4_free_cmd_mailbox(dev, mailbox); + +err_table: + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mw_enable); + +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + int err; + + if (mw->enabled == MLX4_MPT_EN_HW) { + err = mlx4_HW2SW_MPT(dev, NULL, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) + mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + + mw->enabled = MLX4_MPT_EN_SW; + } + if (mw->enabled) + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + mlx4_mpt_release(dev, key_to_hw_index(mw->key)); +} +EXPORT_SYMBOL_GPL(mlx4_mw_free); + int mlx4_init_mr_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e9fe8caaf8bb..67b4695e5940 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -170,6 +170,7 @@ enum { #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { + MLX4_BMME_FLAG_WIN_TYPE_2B = 1 << 1, MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, @@ -237,7 +238,8 @@ enum { MLX4_PERM_LOCAL_WRITE = 1 << 11, MLX4_PERM_REMOTE_READ = 1 << 12, MLX4_PERM_REMOTE_WRITE = 1 << 13, - MLX4_PERM_ATOMIC = 1 << 14 + MLX4_PERM_ATOMIC = 1 << 14, + MLX4_PERM_BIND_MW = 1 << 15, }; enum { @@ -503,6 +505,18 @@ struct mlx4_mr { int enabled; }; +enum mlx4_mw_type { + MLX4_MW_TYPE_1 = 1, + MLX4_MW_TYPE_2 = 2, +}; + +struct mlx4_mw { + u32 key; + u32 pd; + enum mlx4_mw_type type; + int enabled; +}; + struct mlx4_fmr { struct mlx4_mr mr; struct mlx4_mpt_entry *mpt; @@ -803,6 +817,10 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw); +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw); +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, -- cgit v1.2.3 From 6ff63e194066a1f14aee805366a1d79c541fddae Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:15 +0000 Subject: IB/mlx4: Support memory window binding * Implement memory windows binding in mlx4_ib_post_send. * Implement mlx4_ib_bind_mw by deferring to mlx4_ib_post_send. * Rename MLX4_WQE_FMR_PERM_* flags to MLX4_WQE_FMR_AND_BIND_PERM_*, indicating that they are used both for fast registration work requests, and for memory window bind work requests. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 ++ drivers/infiniband/hw/mlx4/mr.c | 22 ++++++++++++++++++++++ drivers/infiniband/hw/mlx4/qp.c | 35 ++++++++++++++++++++++++++++++++--- include/linux/mlx4/qp.h | 11 ++++++++--- 4 files changed, 64 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 9ba0aaf3a58e..f61ec26500c4 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -592,6 +592,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 5adf4c47ee18..e471f089ff00 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -231,6 +231,28 @@ err_free: return ERR_PTR(err); } +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + struct ib_send_wr wr; + struct ib_send_wr *bad_wr; + int ret; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_BIND_MW; + wr.wr_id = mw_bind->wr_id; + wr.send_flags = mw_bind->send_flags; + wr.wr.bind_mw.mw = mw; + wr.wr.bind_mw.bind_info = mw_bind->bind_info; + wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); + + ret = mlx4_ib_post_send(qp, &wr, &bad_wr); + if (!ret) + mw->rkey = wr.wr.bind_mw.rkey; + + return ret; +} + int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) { struct mlx4_ib_mw *mw = to_mmw(ibmw); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index ebadebe83b11..35cced2a4da8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -104,6 +104,7 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), + [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -1953,9 +1954,12 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq static __be32 convert_access(int acc) { - return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | - (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + return (acc & IB_ACCESS_REMOTE_ATOMIC ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } @@ -1981,6 +1985,24 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) fseg->reserved[1] = 0; } +static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) +{ + bseg->flags1 = + convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); + bseg->flags2 = 0; + if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); + if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); + bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); + bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); + bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); + bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); +} + static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { memset(iseg, 0, sizeof(*iseg)); @@ -2289,6 +2311,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_fmr_seg) / 16; break; + case IB_WR_BIND_MW: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_bind_seg(wqe, wr); + wqe += sizeof(struct mlx4_wqe_bind_seg); + size += sizeof(struct mlx4_wqe_bind_seg) / 16; + break; default: /* No extra segments required for sends */ break; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 6c8a68c602be..67f46ad6920a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -265,6 +265,11 @@ struct mlx4_wqe_lso_seg { __be32 header[0]; }; +enum mlx4_wqe_bind_seg_flags2 { + MLX4_WQE_BIND_ZERO_BASED = (1 << 30), + MLX4_WQE_BIND_TYPE_2 = (1 << 31), +}; + struct mlx4_wqe_bind_seg { __be32 flags1; __be32 flags2; @@ -277,9 +282,9 @@ struct mlx4_wqe_bind_seg { enum { MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, - MLX4_WQE_FMR_PERM_REMOTE_READ = 1 << 29, - MLX4_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, - MLX4_WQE_FMR_PERM_ATOMIC = 1 << 31 + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ = 1 << 29, + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE = 1 << 30, + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC = 1 << 31 }; struct mlx4_wqe_fmr_seg { -- cgit v1.2.3 From e72837e3e7bae3f182c4ac63c9424e86f1158dd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Feb 2013 17:47:04 -0500 Subject: default SET_PERSONALITY() in linux/elf.h Signed-off-by: Al Viro --- arch/avr32/include/asm/elf.h | 3 --- arch/blackfin/include/asm/elf.h | 3 --- arch/c6x/include/asm/elf.h | 3 --- arch/cris/include/asm/elf.h | 3 --- arch/frv/include/asm/elf.h | 3 --- arch/h8300/include/asm/elf.h | 3 --- arch/hexagon/include/asm/elf.h | 5 ----- arch/ia64/include/asm/elf.h | 3 --- arch/m32r/include/asm/elf.h | 3 --- arch/m68k/include/asm/elf.h | 3 --- arch/mn10300/include/asm/elf.h | 5 ----- arch/openrisc/include/asm/elf.h | 3 --- arch/powerpc/include/asm/elf.h | 2 -- arch/s390/include/asm/elf.h | 5 +---- arch/score/include/asm/elf.h | 5 ----- arch/sparc/include/asm/elf_32.h | 3 --- include/linux/elf.h | 4 ++++ 17 files changed, 5 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h index e2c328739808..d232888b99d5 100644 --- a/arch/avr32/include/asm/elf.h +++ b/arch/avr32/include/asm/elf.h @@ -102,7 +102,4 @@ typedef struct user_fpu_struct elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX_32BIT | (current->personality & (~PER_MASK))) - #endif /* __ASM_AVR32_ELF_H */ diff --git a/arch/blackfin/include/asm/elf.h b/arch/blackfin/include/asm/elf.h index 14bc98ff668f..d15cb9b5d52c 100644 --- a/arch/blackfin/include/asm/elf.h +++ b/arch/blackfin/include/asm/elf.h @@ -132,7 +132,4 @@ do { \ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif diff --git a/arch/c6x/include/asm/elf.h b/arch/c6x/include/asm/elf.h index 32b997126adf..9a4dfc5eb249 100644 --- a/arch/c6x/include/asm/elf.h +++ b/arch/c6x/include/asm/elf.h @@ -77,9 +77,6 @@ do { \ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - /* C6X specific section types */ #define SHT_C6000_UNWIND 0x70000001 #define SHT_C6000_PREEMPTMAP 0x70000002 diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/asm/elf.h index 8182f2dc89d0..30ded8fbf592 100644 --- a/arch/cris/include/asm/elf.h +++ b/arch/cris/include/asm/elf.h @@ -86,7 +86,4 @@ typedef unsigned long elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h index 9ccbc80f0b11..2bac6446db41 100644 --- a/arch/frv/include/asm/elf.h +++ b/arch/frv/include/asm/elf.h @@ -137,7 +137,4 @@ do { \ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif diff --git a/arch/h8300/include/asm/elf.h b/arch/h8300/include/asm/elf.h index 41193c396bff..6db71248a82f 100644 --- a/arch/h8300/include/asm/elf.h +++ b/arch/h8300/include/asm/elf.h @@ -54,9 +54,6 @@ typedef unsigned long elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #define R_H8_NONE 0 #define R_H8_DIR32 1 #define R_H8_DIR32_28 2 diff --git a/arch/hexagon/include/asm/elf.h b/arch/hexagon/include/asm/elf.h index 1ba4b3bff5ed..1f14e082588e 100644 --- a/arch/hexagon/include/asm/elf.h +++ b/arch/hexagon/include/asm/elf.h @@ -216,11 +216,6 @@ do { \ */ #define ELF_PLATFORM (NULL) -#ifdef __KERNEL__ -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) -#endif - #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index b5298eb09adb..5a83c5cc3dc8 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -201,9 +201,6 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); relevant until we have real hardware to play with... */ #define ELF_PLATFORM NULL -#define SET_PERSONALITY(ex) \ - set_personality((current->personality & ~PER_MASK) | PER_LINUX) - #define elf_read_implies_exec(ex, executable_stack) \ ((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0) diff --git a/arch/m32r/include/asm/elf.h b/arch/m32r/include/asm/elf.h index 70896161c636..8acc9da9a15e 100644 --- a/arch/m32r/include/asm/elf.h +++ b/arch/m32r/include/asm/elf.h @@ -128,7 +128,4 @@ typedef elf_fpreg_t elf_fpregset_t; intent than poking at uname or /proc/cpuinfo. */ #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif /* _ASM_M32R__ELF_H */ diff --git a/arch/m68k/include/asm/elf.h b/arch/m68k/include/asm/elf.h index f83c1d0a87cf..b1c26de438be 100644 --- a/arch/m68k/include/asm/elf.h +++ b/arch/m68k/include/asm/elf.h @@ -113,7 +113,4 @@ typedef struct user_m68kfp_struct elf_fpregset_t; #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h index 4ebd6b3a0a1e..f592d7a9f032 100644 --- a/arch/mn10300/include/asm/elf.h +++ b/arch/mn10300/include/asm/elf.h @@ -150,9 +150,4 @@ do { \ */ #define ELF_PLATFORM (NULL) -#ifdef __KERNEL__ -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) -#endif - #endif /* _ASM_ELF_H */ diff --git a/arch/openrisc/include/asm/elf.h b/arch/openrisc/include/asm/elf.h index f4aa8a542a22..d334e204bbdd 100644 --- a/arch/openrisc/include/asm/elf.h +++ b/arch/openrisc/include/asm/elf.h @@ -62,7 +62,4 @@ extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt); #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 6abf0a163233..ac9790fc3836 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -103,8 +103,6 @@ do { \ # define elf_read_implies_exec(ex, exec_stk) (is_32bit_task() ? \ (exec_stk == EXSTACK_DEFAULT) : 0) #else -# define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) # define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT) #endif /* __powerpc64__ */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 178ff966a8ba..1bfdf24b85a2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -180,10 +180,7 @@ extern unsigned long elf_hwcap; extern char elf_platform[]; #define ELF_PLATFORM (elf_platform) -#ifndef CONFIG_64BIT -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) -#else /* CONFIG_64BIT */ +#ifdef CONFIG_64BIT #define SET_PERSONALITY(ex) \ do { \ if (personality(current->personality) != PER_LINUX32) \ diff --git a/arch/score/include/asm/elf.h b/arch/score/include/asm/elf.h index 5d566c7a0af2..6a9421c693ca 100644 --- a/arch/score/include/asm/elf.h +++ b/arch/score/include/asm/elf.h @@ -52,11 +52,6 @@ typedef elf_fpreg_t elf_fpregset_t; #define ELF_DATA ELFDATA2LSB #define ELF_ARCH EM_SCORE7 -#define SET_PERSONALITY(ex) \ -do { \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \ -} while (0) - struct task_struct; struct pt_regs; diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h index ac74a2c98e6d..a24e41fcdde1 100644 --- a/arch/sparc/include/asm/elf_32.h +++ b/arch/sparc/include/asm/elf_32.h @@ -128,7 +128,4 @@ typedef struct { #define ELF_PLATFORM (NULL) -#define SET_PERSONALITY(ex) \ - set_personality(PER_LINUX | (current->personality & (~PER_MASK))) - #endif /* !(__ASMSPARC_ELF_H) */ diff --git a/include/linux/elf.h b/include/linux/elf.h index 8c9048e33463..40a3c0e01b2b 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -10,6 +10,10 @@ Override in asm/elf.h as needed. */ # define elf_read_implies_exec(ex, have_pt_gnu_stack) 0 #endif +#ifndef SET_PERSONALITY +#define SET_PERSONALITY(ex) \ + set_personality(PER_LINUX | (current->personality & (~PER_MASK))) +#endif #if ELF_CLASS == ELFCLASS32 -- cgit v1.2.3 From 3dadecce20603aa380023c65e6f55f108fd5e952 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Jan 2013 02:18:08 -0500 Subject: switch vfs_getattr() to struct path Signed-off-by: Al Viro --- drivers/base/devtmpfs.c | 3 ++- drivers/base/firmware_class.c | 2 +- drivers/block/loop.c | 2 +- fs/ecryptfs/ecryptfs_kernel.h | 6 ++++++ fs/ecryptfs/inode.c | 3 +-- fs/nfsd/nfs3proc.c | 5 +---- fs/nfsd/nfs3xdr.c | 10 +++++----- fs/nfsd/nfs4xdr.c | 4 ++-- fs/nfsd/nfsproc.c | 12 +++--------- fs/nfsd/nfsxdr.c | 3 ++- fs/nfsd/vfs.h | 8 ++++++++ fs/stat.c | 13 ++++++------- include/linux/fs.h | 2 +- kernel/module.c | 2 +- 14 files changed, 40 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 17cf7cad601e..01fc5b07f951 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -302,7 +302,8 @@ static int handle_remove(const char *nodename, struct device *dev) if (dentry->d_inode) { struct kstat stat; - err = vfs_getattr(parent.mnt, dentry, &stat); + struct path p = {.mnt = parent.mnt, .dentry = dentry}; + err = vfs_getattr(&p, &stat); if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { struct iattr newattrs; /* diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index b392b353be39..a2be09dd7771 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -290,7 +290,7 @@ MODULE_PARM_DESC(path, "customized firmware image search path with a higher prio static noinline_for_stack long fw_file_size(struct file *file) { struct kstat st; - if (vfs_getattr(file->f_path.mnt, file->f_path.dentry, &st)) + if (vfs_getattr(&file->f_path, &st)) return -1; if (!S_ISREG(st.mode)) return -1; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ae1251270624..8031a8cdd698 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1139,7 +1139,7 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) if (lo->lo_state != Lo_bound) return -ENXIO; - error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat); + error = vfs_getattr(&file->f_path, &stat); if (error) return error; memset(info, 0, sizeof(*info)); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index cfb4b9fed520..7e2c6f5d7985 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -509,6 +509,12 @@ ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt; } +static inline struct path * +ecryptfs_dentry_to_lower_path(struct dentry *dentry) +{ + return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; +} + static inline void ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index cc7709e7c508..e0f07fb6d56b 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1027,8 +1027,7 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat lower_stat; int rc; - rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), - ecryptfs_dentry_to_lower(dentry), &lower_stat); + rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat); if (!rc) { fsstack_copy_attr_all(dentry->d_inode, ecryptfs_inode_to_lower(dentry->d_inode)); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 1fc02dfdc5c4..401289913130 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -43,7 +43,6 @@ static __be32 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, struct nfsd3_attrstat *resp) { - int err; __be32 nfserr; dprintk("nfsd: GETATTR(3) %s\n", @@ -55,9 +54,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, if (nfserr) RETURN_STATUS(nfserr); - err = vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, &resp->stat); - nfserr = nfserrno(err); + nfserr = fh_getattr(&resp->fh, &resp->stat); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 324c0baf7cda..7af9417be88d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -11,6 +11,7 @@ #include "xdr3.h" #include "auth.h" #include "netns.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -204,10 +205,10 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; if (dentry && dentry->d_inode) { - int err; + __be32 err; struct kstat stat; - err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat); + err = fh_getattr(fhp, &stat); if (!err) { *p++ = xdr_one; /* attributes follow */ lease_get_mtime(dentry->d_inode, &stat.mtime); @@ -254,13 +255,12 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) */ void fill_post_wcc(struct svc_fh *fhp) { - int err; + __be32 err; if (fhp->fh_post_saved) printk("nfsd: inode locked twice during operation.\n"); - err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, - &fhp->fh_post_attr); + err = fh_getattr(fhp, &fhp->fh_post_attr); fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; if (err) { fhp->fh_post_saved = 0; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0dc11586682f..17e70dabe21c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1997,7 +1997,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) if (path.dentry != path.mnt->mnt_root) break; } - err = vfs_getattr(path.mnt, path.dentry, stat); + err = vfs_getattr(&path, stat); path_put(&path); return err; } @@ -2050,7 +2050,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out; } - err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); + err = vfs_getattr(&path, &stat); if (err) goto out_nfserr; if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index aad6d457b9e8..54c6b3d3cc79 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -26,17 +26,13 @@ static __be32 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } static __be32 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* * Get a file's attributes @@ -150,9 +146,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, &resp->count); if (nfserr) return nfserr; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 979b42106979..bf6d3bccdd98 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -4,6 +4,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch */ +#include "vfs.h" #include "xdr.h" #include "auth.h" @@ -197,7 +198,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct kstat stat; - vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat); + fh_getattr(fhp, &stat); /* BUG */ return encode_fattr(rqstp, p, fhp, &stat); } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 359594c393d2..5b5894159f22 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,6 +6,7 @@ #define LINUX_NFSD_VFS_H #include "nfsfh.h" +#include "nfsd.h" /* * Flags for nfsd_permission @@ -125,4 +126,11 @@ static inline void fh_drop_write(struct svc_fh *fh) } } +static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) +{ + struct path p = {.mnt = fh->fh_export->ex_path.mnt, + .dentry = fh->fh_dentry}; + return nfserrno(vfs_getattr(&p, stat)); +} + #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/stat.c b/fs/stat.c index 14f45459c83d..04ce1ac20d20 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -37,17 +37,17 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) EXPORT_SYMBOL(generic_fillattr); -int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int vfs_getattr(struct path *path, struct kstat *stat) { - struct inode *inode = dentry->d_inode; + struct inode *inode = path->dentry->d_inode; int retval; - retval = security_inode_getattr(mnt, dentry); + retval = security_inode_getattr(path->mnt, path->dentry); if (retval) return retval; if (inode->i_op->getattr) - return inode->i_op->getattr(mnt, dentry, stat); + return inode->i_op->getattr(path->mnt, path->dentry, stat); generic_fillattr(inode, stat); return 0; @@ -61,8 +61,7 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) int error = -EBADF; if (f.file) { - error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry, - stat); + error = vfs_getattr(&f.file->f_path, stat); fdput(f); } return error; @@ -89,7 +88,7 @@ retry: if (error) goto out; - error = vfs_getattr(path.mnt, path.dentry, stat); + error = vfs_getattr(&path, stat); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3ab69777b4d8..7f471520b88b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2468,7 +2468,7 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); -extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int vfs_getattr(struct path *, struct kstat *); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); diff --git a/kernel/module.c b/kernel/module.c index b10b048367e1..950076eb3273 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2519,7 +2519,7 @@ static int copy_module_from_fd(int fd, struct load_info *info) if (err) goto out; - err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); + err = vfs_getattr(&file->f_path, &stat); if (err) goto out; -- cgit v1.2.3 From ecf3d1f1aa74da0d632b651a2e05a911f60e92c0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 20 Feb 2013 11:19:05 -0500 Subject: vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op The following set of operations on a NFS client and server will cause server# mkdir a client# cd a server# mv a a.bak client# sleep 30 # (or whatever the dir attrcache timeout is) client# stat . stat: cannot stat `.': Stale NFS file handle Obviously, we should not be getting an ESTALE error back there since the inode still exists on the server. The problem is that the lookup code will call d_revalidate on the dentry that "." refers to, because NFS has FS_REVAL_DOT set. nfs_lookup_revalidate will see that the parent directory has changed and will try to reverify the dentry by redoing a LOOKUP. That of course fails, so the lookup code returns ESTALE. The problem here is that d_revalidate is really a bad fit for this case. What we really want to know at this point is whether the inode is still good or not, but we don't really care what name it goes by or whether the dcache is still valid. Add a new d_op->d_weak_revalidate operation and have complete_walk call that instead of d_revalidate. The intent there is to allow for a "weaker" d_revalidate that just checks to see whether the inode is still good. This is also gives us an opportunity to kill off the FS_REVAL_DOT special casing. [AV: changed method name, added note in porting, fixed confusion re having it possibly called from RCU mode (it won't be)] Cc: NeilBrown Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 ++ Documentation/filesystems/porting | 4 ++++ Documentation/filesystems/vfs.txt | 24 +++++++++++++++++++++-- fs/9p/vfs_dentry.c | 1 + fs/9p/vfs_super.c | 2 +- fs/dcache.c | 3 +++ fs/namei.c | 8 ++------ fs/nfs/dir.c | 40 +++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4super.c | 6 +++--- fs/nfs/super.c | 6 +++--- include/linux/dcache.h | 3 +++ include/linux/fs.h | 1 - 12 files changed, 84 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index f48e0c6b4c42..0706d32a61e6 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -10,6 +10,7 @@ be able to use diff(1). --------------------------- dentry_operations -------------------------- prototypes: int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, @@ -25,6 +26,7 @@ prototypes: locking rules: rename_lock ->d_lock may block rcu-walk d_revalidate: no no yes (ref-walk) maybe +d_weak_revalidate:no no yes no d_hash no no no maybe d_compare: yes no no maybe d_delete: no yes no no diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0472c31c163b..4db22f6491e0 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -441,3 +441,7 @@ d_make_root() drops the reference to inode if dentry allocation fails. two, it gets "is it an O_EXCL or equivalent?" boolean argument. Note that local filesystems can ignore tha argument - they are guaranteed that the object doesn't exist. It's remote/distributed ones that might care... +-- +[mandatory] + FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate() +in your dentry operations instead. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index e3869098163e..bc4b06b3160a 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -900,6 +900,7 @@ defined: struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, @@ -915,8 +916,13 @@ struct dentry_operations { d_revalidate: called when the VFS needs to revalidate a dentry. This is called whenever a name look-up finds a dentry in the - dcache. Most filesystems leave this as NULL, because all their - dentries in the dcache are valid + dcache. Most local filesystems leave this as NULL, because all their + dentries in the dcache are valid. Network filesystems are different + since things can change on the server without the client necessarily + being aware of it. + + This function should return a positive value if the dentry is still + valid, and zero or a negative error code if it isn't. d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU). If in rcu-walk mode, the filesystem must revalidate the dentry without @@ -927,6 +933,20 @@ struct dentry_operations { If a situation is encountered that rcu-walk cannot handle, return -ECHILD and it will be called again in ref-walk mode. + d_weak_revalidate: called when the VFS needs to revalidate a "jumped" dentry. + This is called when a path-walk ends at dentry that was not acquired by + doing a lookup in the parent directory. This includes "/", "." and "..", + as well as procfs-style symlinks and mountpoint traversal. + + In this case, we are less concerned with whether the dentry is still + fully correct, but rather that the inode is still valid. As with + d_revalidate, most local filesystems will set this to NULL since their + dcache entries are always valid. + + This function has the same return code semantics as d_revalidate. + + d_weak_revalidate is only called after leaving rcu-walk mode. + d_hash: called when the VFS adds a dentry to the hash table. The first dentry passed to d_hash is the parent directory that the name is to be hashed into. The inode is the dentry's inode. diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 64600b5d0522..9ad68628522c 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -137,6 +137,7 @@ out_valid: const struct dentry_operations v9fs_cached_dentry_operations = { .d_revalidate = v9fs_lookup_revalidate, + .d_weak_revalidate = v9fs_lookup_revalidate, .d_delete = v9fs_cached_dentry_delete, .d_release = v9fs_dentry_release, }; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 137d50396898..91dad63e5a2d 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -363,5 +363,5 @@ struct file_system_type v9fs_fs_type = { .mount = v9fs_mount, .kill_sb = v9fs_kill_super, .owner = THIS_MODULE, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT, + .fs_flags = FS_RENAME_DOES_D_MOVE, }; diff --git a/fs/dcache.c b/fs/dcache.c index ebab049826c0..68220dd0c135 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1358,6 +1358,7 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | + DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE )); dentry->d_op = op; if (!op) @@ -1368,6 +1369,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_COMPARE; if (op->d_revalidate) dentry->d_flags |= DCACHE_OP_REVALIDATE; + if (op->d_weak_revalidate) + dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE; if (op->d_delete) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) diff --git a/fs/namei.c b/fs/namei.c index 052c095c2808..dc984fee5532 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -600,14 +600,10 @@ static int complete_walk(struct nameidata *nd) if (likely(!(nd->flags & LOOKUP_JUMPED))) return 0; - if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) + if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) return 0; - if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) - return 0; - - /* Note: we do not d_invalidate() */ - status = d_revalidate(dentry, nd->flags); + status = dentry->d_op->d_weak_revalidate(dentry, nd->flags); if (status > 0) return 0; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a8bd28cde7e2..f23f455be42b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1135,6 +1135,45 @@ out_error: return error; } +/* + * A weaker form of d_revalidate for revalidating just the dentry->d_inode + * when we don't really care about the dentry name. This is called when a + * pathwalk ends on a dentry that was not found via a normal lookup in the + * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals). + * + * In this situation, we just want to verify that the inode itself is OK + * since the dentry might have changed on the server. + */ +static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + int error; + struct inode *inode = dentry->d_inode; + + /* + * I believe we can only get a negative dentry here in the case of a + * procfs-style symlink. Just assume it's correct for now, but we may + * eventually need to do something more here. + */ + if (!inode) { + dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); + return 1; + } + + if (is_bad_inode(inode)) { + dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); + return 0; + } + + error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", + __func__, inode->i_ino, error ? "invalid" : "valid"); + return !error; +} + /* * This is called from dput() when d_count is going to 0. */ @@ -1202,6 +1241,7 @@ static void nfs_d_release(struct dentry *dentry) const struct dentry_operations nfs_dentry_operations = { .d_revalidate = nfs_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 84d2e9e2f313..569b166cc050 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -28,7 +28,7 @@ static struct file_system_type nfs4_remote_fs_type = { .name = "nfs4", .mount = nfs4_remote_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; static struct file_system_type nfs4_remote_referral_fs_type = { @@ -36,7 +36,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = { .name = "nfs4", .mount = nfs4_remote_referral_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; struct file_system_type nfs4_referral_fs_type = { @@ -44,7 +44,7 @@ struct file_system_type nfs4_referral_fs_type = { .name = "nfs4", .mount = nfs4_referral_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; static const struct super_operations nfs4_sops = { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2e7e8c878e5d..92acc26f9c5f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -292,7 +292,7 @@ struct file_system_type nfs_fs_type = { .name = "nfs", .mount = nfs_fs_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; EXPORT_SYMBOL_GPL(nfs_fs_type); @@ -301,7 +301,7 @@ struct file_system_type nfs_xdev_fs_type = { .name = "nfs", .mount = nfs_xdev_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; const struct super_operations nfs_sops = { @@ -331,7 +331,7 @@ struct file_system_type nfs4_fs_type = { .name = "nfs4", .mount = nfs_fs_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; EXPORT_SYMBOL_GPL(nfs4_fs_type); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 03d169288423..1a6bb81f0fe5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -145,6 +145,7 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, @@ -192,6 +193,8 @@ struct dentry_operations { #define DCACHE_GENOCIDE 0x0200 #define DCACHE_SHRINK_LIST 0x0400 +#define DCACHE_OP_WEAK_REVALIDATE 0x0800 + #define DCACHE_NFSFS_RENAMED 0x1000 /* this dentry has been "silly renamed" and has to be deleted on the last * dput() */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 7f471520b88b..da94011ae83c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1807,7 +1807,6 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ -#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); -- cgit v1.2.3 From 182be684784334598eee1d90274e7f7aa0063616 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Jan 2013 02:21:54 -0500 Subject: kill f_vfsmnt very few users left... Signed-off-by: Al Viro --- fs/ocfs2/dir.c | 2 +- fs/ocfs2/file.c | 4 ++-- fs/ocfs2/mmap.c | 2 +- include/linux/fs.h | 1 - security/commoncap.c | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index ac0d4a0e8a41..c87d0793bdec 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2020,7 +2020,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); - error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); + error = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); if (lock_level && error >= 0) { /* We release EX lock which used to update atime * and get PR lock again to reduce contention diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 04098af9dbc8..5bcd865905ef 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2526,7 +2526,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in, /* * See the comment in ocfs2_file_aio_read() */ - ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); + ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; @@ -2589,7 +2589,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, * like i_size. This allows the checks down below * generic_file_aio_read() a chance of actually working. */ - ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); + ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 07c585b85000..10d66c75cecb 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -181,7 +181,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) int ret = 0, lock_level = 0; ret = ocfs2_inode_lock_atime(file_inode(file), - file->f_vfsmnt, &lock_level); + file->f_path.mnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index da94011ae83c..c766afd1e684 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,7 +769,6 @@ struct file { } f_u; struct path f_path; #define f_dentry f_path.dentry -#define f_vfsmnt f_path.mnt const struct file_operations *f_op; /* diff --git a/security/commoncap.c b/security/commoncap.c index 7ee08c756d6b..c44b6fe6648e 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -440,7 +440,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c if (!file_caps_enabled) return 0; - if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) + if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) return 0; dentry = dget(bprm->file->f_dentry); -- cgit v1.2.3 From 7bb307e894d51308aa0582a8c4cc5875bbc645b9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Feb 2013 14:51:48 -0500 Subject: export kernel_write(), convert open-coded instances Signed-off-by: Al Viro --- drivers/mtd/nand/nandsim.c | 34 ++++++++++++---------------------- fs/ecryptfs/read_write.c | 6 +----- fs/splice.c | 5 +++-- include/linux/fs.h | 1 + kernel/sysctl_binary.c | 39 +++++++-------------------------------- 5 files changed, 24 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 818b65c85d12..8f30d385bfa3 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -1408,40 +1408,32 @@ static void clear_memalloc(int memalloc) current->flags &= ~PF_MEMALLOC; } -static ssize_t read_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t *pos) +static ssize_t read_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t pos) { - mm_segment_t old_fs; ssize_t tx; int err, memalloc; - err = get_pages(ns, file, count, *pos); + err = get_pages(ns, file, count, pos); if (err) return err; - old_fs = get_fs(); - set_fs(get_ds()); memalloc = set_memalloc(); - tx = vfs_read(file, (char __user *)buf, count, pos); + tx = kernel_read(file, pos, buf, count); clear_memalloc(memalloc); - set_fs(old_fs); put_pages(ns); return tx; } -static ssize_t write_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t *pos) +static ssize_t write_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t pos) { - mm_segment_t old_fs; ssize_t tx; int err, memalloc; - err = get_pages(ns, file, count, *pos); + err = get_pages(ns, file, count, pos); if (err) return err; - old_fs = get_fs(); - set_fs(get_ds()); memalloc = set_memalloc(); - tx = vfs_write(file, (char __user *)buf, count, pos); + tx = kernel_write(file, buf, count, pos); clear_memalloc(memalloc); - set_fs(old_fs); put_pages(ns); return tx; } @@ -1511,7 +1503,7 @@ static void read_page(struct nandsim *ns, int num) if (do_read_error(ns, num)) return; pos = (loff_t)ns->regs.row * ns->geom.pgszoob + ns->regs.column + ns->regs.off; - tx = read_file(ns, ns->cfile, ns->buf.byte, num, &pos); + tx = read_file(ns, ns->cfile, ns->buf.byte, num, pos); if (tx != num) { NS_ERR("read_page: read error for page %d ret %ld\n", ns->regs.row, (long)tx); return; @@ -1573,7 +1565,7 @@ static int prog_page(struct nandsim *ns, int num) u_char *pg_off; if (ns->cfile) { - loff_t off, pos; + loff_t off; ssize_t tx; int all; @@ -1585,8 +1577,7 @@ static int prog_page(struct nandsim *ns, int num) memset(ns->file_buf, 0xff, ns->geom.pgszoob); } else { all = 0; - pos = off; - tx = read_file(ns, ns->cfile, pg_off, num, &pos); + tx = read_file(ns, ns->cfile, pg_off, num, off); if (tx != num) { NS_ERR("prog_page: read error for page %d ret %ld\n", ns->regs.row, (long)tx); return -1; @@ -1595,16 +1586,15 @@ static int prog_page(struct nandsim *ns, int num) for (i = 0; i < num; i++) pg_off[i] &= ns->buf.byte[i]; if (all) { - pos = (loff_t)ns->regs.row * ns->geom.pgszoob; - tx = write_file(ns, ns->cfile, ns->file_buf, ns->geom.pgszoob, &pos); + loff_t pos = (loff_t)ns->regs.row * ns->geom.pgszoob; + tx = write_file(ns, ns->cfile, ns->file_buf, ns->geom.pgszoob, pos); if (tx != ns->geom.pgszoob) { NS_ERR("prog_page: write error for page %d ret %ld\n", ns->regs.row, (long)tx); return -1; } ns->pages_written[ns->regs.row] = 1; } else { - pos = off; - tx = write_file(ns, ns->cfile, pg_off, num, &pos); + tx = write_file(ns, ns->cfile, pg_off, num, off); if (tx != num) { NS_ERR("prog_page: write error for page %d ret %ld\n", ns->regs.row, (long)tx); return -1; diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index b2a34a192f4f..6a160539cd23 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -40,16 +40,12 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size) { struct file *lower_file; - mm_segment_t fs_save; ssize_t rc; lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; if (!lower_file) return -EIO; - fs_save = get_fs(); - set_fs(get_ds()); - rc = vfs_write(lower_file, data, size, &offset); - set_fs(fs_save); + rc = kernel_write(lower_file, data, size, offset); mark_inode_dirty_sync(ecryptfs_inode); return rc; } diff --git a/fs/splice.c b/fs/splice.c index 963213d56403..718bd0056384 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -569,7 +569,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec, return res; } -static ssize_t kernel_write(struct file *file, const char *buf, size_t count, +ssize_t kernel_write(struct file *file, const char *buf, size_t count, loff_t pos) { mm_segment_t old_fs; @@ -578,11 +578,12 @@ static ssize_t kernel_write(struct file *file, const char *buf, size_t count, old_fs = get_fs(); set_fs(get_ds()); /* The cast to a user pointer is valid due to the set_fs() */ - res = vfs_write(file, (const char __user *)buf, count, &pos); + res = vfs_write(file, (__force const char __user *)buf, count, &pos); set_fs(old_fs); return res; } +EXPORT_SYMBOL(kernel_write); ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, diff --git a/include/linux/fs.h b/include/linux/fs.h index c766afd1e684..d858363a7c17 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2277,6 +2277,7 @@ static inline void i_readcount_inc(struct inode *inode) extern int do_pipe_flags(int *, int); extern int kernel_read(struct file *, loff_t, char *, unsigned long); +extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 5a6384450501..37f240fec37a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -971,7 +971,6 @@ out: static ssize_t bin_intvec(struct file *file, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { - mm_segment_t old_fs = get_fs(); ssize_t copied = 0; char *buffer; ssize_t result; @@ -984,13 +983,10 @@ static ssize_t bin_intvec(struct file *file, if (oldval && oldlen) { unsigned __user *vec = oldval; size_t length = oldlen / sizeof(*vec); - loff_t pos = 0; char *str, *end; int i; - set_fs(KERNEL_DS); - result = vfs_read(file, buffer, BUFSZ - 1, &pos); - set_fs(old_fs); + result = kernel_read(file, 0, buffer, BUFSZ - 1); if (result < 0) goto out_kfree; @@ -1017,7 +1013,6 @@ static ssize_t bin_intvec(struct file *file, if (newval && newlen) { unsigned __user *vec = newval; size_t length = newlen / sizeof(*vec); - loff_t pos = 0; char *str, *end; int i; @@ -1033,9 +1028,7 @@ static ssize_t bin_intvec(struct file *file, str += snprintf(str, end - str, "%lu\t", value); } - set_fs(KERNEL_DS); - result = vfs_write(file, buffer, str - buffer, &pos); - set_fs(old_fs); + result = kernel_write(file, buffer, str - buffer, 0); if (result < 0) goto out_kfree; } @@ -1049,7 +1042,6 @@ out: static ssize_t bin_ulongvec(struct file *file, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { - mm_segment_t old_fs = get_fs(); ssize_t copied = 0; char *buffer; ssize_t result; @@ -1062,13 +1054,10 @@ static ssize_t bin_ulongvec(struct file *file, if (oldval && oldlen) { unsigned long __user *vec = oldval; size_t length = oldlen / sizeof(*vec); - loff_t pos = 0; char *str, *end; int i; - set_fs(KERNEL_DS); - result = vfs_read(file, buffer, BUFSZ - 1, &pos); - set_fs(old_fs); + result = kernel_read(file, 0, buffer, BUFSZ - 1); if (result < 0) goto out_kfree; @@ -1095,7 +1084,6 @@ static ssize_t bin_ulongvec(struct file *file, if (newval && newlen) { unsigned long __user *vec = newval; size_t length = newlen / sizeof(*vec); - loff_t pos = 0; char *str, *end; int i; @@ -1111,9 +1099,7 @@ static ssize_t bin_ulongvec(struct file *file, str += snprintf(str, end - str, "%lu\t", value); } - set_fs(KERNEL_DS); - result = vfs_write(file, buffer, str - buffer, &pos); - set_fs(old_fs); + result = kernel_write(file, buffer, str - buffer, 0); if (result < 0) goto out_kfree; } @@ -1127,19 +1113,15 @@ out: static ssize_t bin_uuid(struct file *file, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { - mm_segment_t old_fs = get_fs(); ssize_t result, copied = 0; /* Only supports reads */ if (oldval && oldlen) { - loff_t pos = 0; char buf[40], *str = buf; unsigned char uuid[16]; int i; - set_fs(KERNEL_DS); - result = vfs_read(file, buf, sizeof(buf) - 1, &pos); - set_fs(old_fs); + result = kernel_read(file, 0, buf, sizeof(buf) - 1); if (result < 0) goto out; @@ -1175,18 +1157,14 @@ out: static ssize_t bin_dn_node_address(struct file *file, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { - mm_segment_t old_fs = get_fs(); ssize_t result, copied = 0; if (oldval && oldlen) { - loff_t pos = 0; char buf[15], *nodep; unsigned long area, node; __le16 dnaddr; - set_fs(KERNEL_DS); - result = vfs_read(file, buf, sizeof(buf) - 1, &pos); - set_fs(old_fs); + result = kernel_read(file, 0, buf, sizeof(buf) - 1); if (result < 0) goto out; @@ -1215,7 +1193,6 @@ static ssize_t bin_dn_node_address(struct file *file, } if (newval && newlen) { - loff_t pos = 0; __le16 dnaddr; char buf[15]; int len; @@ -1232,9 +1209,7 @@ static ssize_t bin_dn_node_address(struct file *file, le16_to_cpu(dnaddr) >> 10, le16_to_cpu(dnaddr) & 0x3ff); - set_fs(KERNEL_DS); - result = vfs_write(file, buf, len, &pos); - set_fs(old_fs); + result = kernel_write(file, buf, len, 0); if (result < 0) goto out; } -- cgit v1.2.3 From 9cc64ceaa8b8e8c874519caee79e18cb35d3ce3e Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Wed, 20 Feb 2013 13:16:01 +1100 Subject: fs/exec.c: make bprm_mm_init() static There is only one user of bprm_mm_init, and it's inside the same file. Signed-off-by: Yuanhan Liu Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/exec.c | 2 +- include/linux/binfmts.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 7b6f4d59b26c..864c50df660a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -355,7 +355,7 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len) * flags, permissions, and offset, so we use temporary values. We'll update * them later in setup_arg_pages(). */ -int bprm_mm_init(struct linux_binprm *bprm) +static int bprm_mm_init(struct linux_binprm *bprm) { int err; struct mm_struct *mm = NULL; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 0530b9860359..c3a09149f793 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -111,7 +111,6 @@ extern int suid_dumpable; extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); -extern int bprm_mm_init(struct linux_binprm *bprm); extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); -- cgit v1.2.3 From 12979354a1d6ef25d86f381e4d5f9e103f29913a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2013 09:15:10 -0800 Subject: libceph: rename ceph_pg -> ceph_pg_v1 Rename the old version this type to distinguish it from the new version. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 2 +- include/linux/ceph/osd_client.h | 2 +- include/linux/ceph/osdmap.h | 7 ++++--- include/linux/ceph/rados.h | 4 ++-- net/ceph/osd_client.c | 2 +- net/ceph/osdmap.c | 18 +++++++++--------- 6 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 3b22150d3e19..e831436d6e68 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) u64 len = 1, olen; u64 tmp; struct ceph_object_layout ol; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; int r; /* copy and validate */ diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 388158ff0cbc..be2867330e23 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -56,7 +56,7 @@ struct ceph_osd_request { struct list_head r_linger_item; struct list_head r_linger_osd; struct ceph_osd *r_osd; - struct ceph_pg r_pgid; + struct ceph_pg_v1 r_pgid; int r_pg_osds[CEPH_PG_MAX_SIZE]; int r_num_pg_osds; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index c83a838f89f5..eb4989aa48e8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -28,7 +28,7 @@ struct ceph_pg_pool_info { struct ceph_pg_mapping { struct rb_node node; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; int len; int osds[]; }; @@ -118,10 +118,11 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); -extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, + struct ceph_pg_v1 pgid, int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, - struct ceph_pg pgid); + struct ceph_pg_v1 pgid); extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index b65182aba6f7..e7cece69b13f 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -64,7 +64,7 @@ struct ceph_timespec { * placement group. * we encode this into one __le64. */ -struct ceph_pg { +struct ceph_pg_v1 { __le16 preferred; /* preferred primary osd */ __le16 ps; /* placement seed */ __le32 pool; /* object pool */ @@ -128,7 +128,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) * object layout - how a given object should be stored. */ struct ceph_object_layout { - struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ + struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */ __le32 ol_stripe_unit; /* for per-object parity, if any */ } __attribute__ ((packed)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 39629b66f3b1..e3ab8d60d080 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -914,7 +914,7 @@ static int __map_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, int force_resend) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; int acting[CEPH_PG_MAX_SIZE]; int o = -1, num = 0; int err; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 3c61e21611d3..8c89ac25081a 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -350,7 +350,7 @@ bad: * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid * to a set of osds) */ -static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) +static int pgid_cmp(struct ceph_pg_v1 l, struct ceph_pg_v1 r) { u64 a = *(u64 *)&l; u64 b = *(u64 *)&r; @@ -389,7 +389,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new, } static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, - struct ceph_pg pgid) + struct ceph_pg_v1 pgid) { struct rb_node *n = root->rb_node; struct ceph_pg_mapping *pg; @@ -411,7 +411,7 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, return NULL; } -static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) +static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg_v1 pgid) { struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); @@ -721,7 +721,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, len, bad); for (i = 0; i < len; i++) { int n, j; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; struct ceph_pg_mapping *pg; ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); @@ -944,7 +944,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_mapping *pg; int j; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; u32 pglen; ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); ceph_decode_copy(p, &pgid, sizeof(pgid)); @@ -1079,7 +1079,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, struct ceph_osdmap *osdmap) { unsigned int num, num_mask; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; int poolid = le32_to_cpu(fl->fl_pg_pool); struct ceph_pg_pool_info *pool; unsigned int ps; @@ -1108,7 +1108,7 @@ EXPORT_SYMBOL(ceph_calc_object_layout); * Calculate raw osd vector for the given pgid. Return pointer to osd * array, or NULL on failure. */ -static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, +static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, int *osds, int *num) { struct ceph_pg_mapping *pg; @@ -1163,7 +1163,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, /* * Return acting set for given pgid. */ -int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, +int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, int *acting) { int rawosds[CEPH_PG_MAX_SIZE], *osds; @@ -1184,7 +1184,7 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, /* * Return primary osd for given pgid, or -1 if none. */ -int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) +int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid) { int rawosds[CEPH_PG_MAX_SIZE], *osds; int i, num = CEPH_PG_MAX_SIZE; -- cgit v1.2.3 From 5b191d9914eb68257f47de9d5bfe099b77f0687c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 23 Feb 2013 10:38:16 -0800 Subject: libceph: decode into cpu-native ceph_pg type Always decode data into our cpu-native ceph_pg type that has the correct field widths. Limit any remaining uses of ceph_pg_v1 to dealing with the legacy protocol. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 5 +-- include/linux/ceph/osd_client.h | 2 +- include/linux/ceph/osdmap.h | 11 ++++-- net/ceph/debugfs.c | 5 ++- net/ceph/osd_client.c | 9 ++--- net/ceph/osdmap.c | 78 +++++++++++++++++++++++------------------ 6 files changed, 62 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index e831436d6e68..fb036ed3e129 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) u64 len = 1, olen; u64 tmp; struct ceph_object_layout ol; - struct ceph_pg_v1 pgid; + struct ceph_pg pgid; int r; /* copy and validate */ @@ -212,7 +212,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, osdc->osdmap); - pgid = ol.ol_pgid; + pgid.pool = le32_to_cpu(ol.ol_pgid.pool); + pgid.seed = le16_to_cpu(ol.ol_pgid.ps); dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { struct ceph_entity_addr *a = diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index be2867330e23..388158ff0cbc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -56,7 +56,7 @@ struct ceph_osd_request { struct list_head r_linger_item; struct list_head r_linger_osd; struct ceph_osd *r_osd; - struct ceph_pg_v1 r_pgid; + struct ceph_pg r_pgid; int r_pg_osds[CEPH_PG_MAX_SIZE]; int r_num_pg_osds; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index eb4989aa48e8..8a612df4c248 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -18,6 +18,11 @@ * The map can be updated either via an incremental map (diff) describing * the change between two successive epochs, or as a fully encoded map. */ +struct ceph_pg { + uint64_t pool; + uint32_t seed; +}; + struct ceph_pg_pool_info { struct rb_node node; int id; @@ -28,7 +33,7 @@ struct ceph_pg_pool_info { struct ceph_pg_mapping { struct rb_node node; - struct ceph_pg_v1 pgid; + struct ceph_pg pgid; int len; int osds[]; }; @@ -119,10 +124,10 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, - struct ceph_pg_v1 pgid, + struct ceph_pg pgid, int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, - struct ceph_pg_v1 pgid); + struct ceph_pg pgid); extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 38b5dc1823d4..61a9af634f8b 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -131,10 +131,9 @@ static int osdc_show(struct seq_file *s, void *pp) req = rb_entry(p, struct ceph_osd_request, r_node); - seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, + seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid, req->r_osd ? req->r_osd->o_osd : -1, - le32_to_cpu(req->r_pgid.pool), - le16_to_cpu(req->r_pgid.ps)); + req->r_pgid.pool, req->r_pgid.seed); head = req->r_request->front.iov_base; op = (void *)(head + 1); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e3ab8d60d080..1990834e518b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -914,7 +914,7 @@ static int __map_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, int force_resend) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; - struct ceph_pg_v1 pgid; + struct ceph_pg pgid; int acting[CEPH_PG_MAX_SIZE]; int o = -1, num = 0; int err; @@ -926,7 +926,8 @@ static int __map_request(struct ceph_osd_client *osdc, list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; } - pgid = reqhead->layout.ol_pgid; + pgid.pool = le32_to_cpu(reqhead->layout.ol_pgid.pool); + pgid.seed = le16_to_cpu(reqhead->layout.ol_pgid.ps); req->r_pgid = pgid; err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); @@ -943,8 +944,8 @@ static int __map_request(struct ceph_osd_client *osdc, (req->r_osd == NULL && o == -1)) return 0; /* no change */ - dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n", - req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, + dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n", + req->r_tid, pgid.pool, pgid.seed, o, req->r_osd ? req->r_osd->o_osd : -1); /* record full pg acting set */ diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 8c89ac25081a..81118db5bd11 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -350,14 +350,15 @@ bad: * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid * to a set of osds) */ -static int pgid_cmp(struct ceph_pg_v1 l, struct ceph_pg_v1 r) +static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) { - u64 a = *(u64 *)&l; - u64 b = *(u64 *)&r; - - if (a < b) + if (l.pool < r.pool) + return -1; + if (l.pool > r.pool) + return 1; + if (l.seed < r.seed) return -1; - if (a > b) + if (l.seed > r.seed) return 1; return 0; } @@ -389,7 +390,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new, } static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, - struct ceph_pg_v1 pgid) + struct ceph_pg pgid) { struct rb_node *n = root->rb_node; struct ceph_pg_mapping *pg; @@ -403,25 +404,26 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, } else if (c > 0) { n = n->rb_right; } else { - dout("__lookup_pg_mapping %llx got %p\n", - *(u64 *)&pgid, pg); + dout("__lookup_pg_mapping %lld.%x got %p\n", + pgid.pool, pgid.seed, pg); return pg; } } return NULL; } -static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg_v1 pgid) +static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) { struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); if (pg) { - dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); + dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed, + pg); rb_erase(&pg->node, root); kfree(pg); return 0; } - dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); + dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed); return -ENOENT; } @@ -721,11 +723,14 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, len, bad); for (i = 0; i < len; i++) { int n, j; - struct ceph_pg_v1 pgid; + struct ceph_pg pgid; + struct ceph_pg_v1 pgid_v1; struct ceph_pg_mapping *pg; ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); + ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); + pgid.pool = le32_to_cpu(pgid_v1.pool); + pgid.seed = le16_to_cpu(pgid_v1.ps); n = ceph_decode_32(p); err = -EINVAL; if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) @@ -743,7 +748,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) err = __insert_pg_mapping(pg, &map->pg_temp); if (err) goto bad; - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed, + len); } /* crush */ @@ -944,10 +950,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_mapping *pg; int j; - struct ceph_pg_v1 pgid; + struct ceph_pg_v1 pgid_v1; + struct ceph_pg pgid; u32 pglen; ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); + ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); + pgid.pool = le32_to_cpu(pgid_v1.pool); + pgid.seed = le16_to_cpu(pgid_v1.ps); pglen = ceph_decode_32(p); if (pglen) { @@ -973,8 +982,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, kfree(pg); goto bad; } - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, - pglen); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, + pgid.seed, pglen); } else { /* remove */ __remove_pg_mapping(&map->pg_temp, pgid); @@ -1079,26 +1088,25 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, struct ceph_osdmap *osdmap) { unsigned int num, num_mask; - struct ceph_pg_v1 pgid; - int poolid = le32_to_cpu(fl->fl_pg_pool); + struct ceph_pg pgid; struct ceph_pg_pool_info *pool; - unsigned int ps; BUG_ON(!osdmap); - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pgid.pool = le32_to_cpu(fl->fl_pg_pool); + pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); if (!pool) return -EIO; - ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); + pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); num = le32_to_cpu(pool->v.pg_num); num_mask = pool->pg_num_mask; - pgid.ps = cpu_to_le16(ps); - pgid.preferred = cpu_to_le16(-1); - pgid.pool = fl->fl_pg_pool; - dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); + dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, + pgid.seed); - ol->ol_pgid = pgid; + ol->ol_pgid.ps = cpu_to_le16(pgid.seed); + ol->ol_pgid.pool = fl->fl_pg_pool; + ol->ol_pgid.preferred = cpu_to_le16(-1); ol->ol_stripe_unit = fl->fl_object_stripe_unit; return 0; } @@ -1108,7 +1116,7 @@ EXPORT_SYMBOL(ceph_calc_object_layout); * Calculate raw osd vector for the given pgid. Return pointer to osd * array, or NULL on failure. */ -static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, +static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, int *osds, int *num) { struct ceph_pg_mapping *pg; @@ -1116,8 +1124,8 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, int ruleno; unsigned int poolid, ps, pps, t, r; - poolid = le32_to_cpu(pgid.pool); - ps = le16_to_cpu(pgid.ps); + poolid = pgid.pool; + ps = pgid.seed; pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); if (!pool) @@ -1126,7 +1134,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, /* pg_temp? */ t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), pool->pgp_num_mask); - pgid.ps = cpu_to_le16(t); + pgid.seed = t; pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1163,7 +1171,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, /* * Return acting set for given pgid. */ -int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, +int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, int *acting) { int rawosds[CEPH_PG_MAX_SIZE], *osds; @@ -1184,7 +1192,7 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid, /* * Return primary osd for given pgid, or -1 if none. */ -int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid) +int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) { int rawosds[CEPH_PG_MAX_SIZE], *osds; int i, num = CEPH_PG_MAX_SIZE; -- cgit v1.2.3 From ec73a754989c27628c9037887df919561280519c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Feb 2013 14:23:07 -0600 Subject: ceph: update "ceph_features.h" This updates "include/linux/ceph/ceph_features.h" so all the feature bits defined in the user space code are defined here. The features supported by this implementation will still differ so that's not updated here. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 2160aab482f6..9e0f5a8ba247 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -12,12 +12,28 @@ #define CEPH_FEATURE_MONNAMES (1<<5) #define CEPH_FEATURE_RECONNECT_SEQ (1<<6) #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) -/* bits 8-17 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_OBJECTLOCATOR (1<<8) +#define CEPH_FEATURE_PGID64 (1<<9) +#define CEPH_FEATURE_INCSUBOSDMAP (1<<10) +#define CEPH_FEATURE_PGPOOL3 (1<<11) +#define CEPH_FEATURE_OSDREPLYMUX (1<<12) +#define CEPH_FEATURE_OSDENC (1<<13) +#define CEPH_FEATURE_OMAP (1<<14) +#define CEPH_FEATURE_MONENC (1<<15) +#define CEPH_FEATURE_QUERY_T (1<<16) +#define CEPH_FEATURE_INDEP_PG_MAP (1<<17) #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) -/* bits 19-24 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19) +#define CEPH_FEATURE_MON_NULLROUTE (1<<20) +#define CEPH_FEATURE_MON_GV (1<<21) +#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22) +#define CEPH_FEATURE_MSG_AUTH (1<<23) +#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24) #define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) -/* bit 26 defined by user-space; not supported yet here */ -#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) +#define CEPH_FEATURE_CREATEPOOLID (1<<26) +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) +#define CEPH_FEATURE_OSD_HBMSGS (1<<28) +#define CEPH_FEATURE_MDSENC (1<<29) /* * Features supported. -- cgit v1.2.3 From 4f6a7e5ee1393ec4b243b39dac9f36992d161540 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 23 Feb 2013 10:41:09 -0800 Subject: ceph: update support for PGID64, PGPOOL3, OSDENC protocol features Support (and require) the PGID64, PGPOOL3, and OSDENC protocol features. These have been present in ceph.git since v0.42, Feb 2012. Require these features to simplify support; nobody is running older userspace. Note that the new request and reply encoding is still not in place, so the new code is not yet functional. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/mdsmap.c | 12 ++- include/linux/ceph/ceph_features.h | 14 +++- include/linux/ceph/mdsmap.h | 4 +- include/linux/ceph/osdmap.h | 16 +++- include/linux/ceph/rados.h | 23 ------ net/ceph/ceph_common.c | 6 +- net/ceph/debugfs.c | 6 +- net/ceph/osdmap.c | 162 ++++++++++++++++++++----------------- 8 files changed, 124 insertions(+), 119 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 73b7d44e8a35..0d3c9240c61b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) return ERR_PTR(-ENOMEM); ceph_decode_16_safe(p, end, version, bad); + if (version > 3) { + pr_warning("got mdsmap version %d > 3, failing", version); + goto bad; + } ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); m->m_epoch = ceph_decode_32(p); @@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) /* pg_pools */ ceph_decode_32_safe(p, end, n, bad); m->m_num_data_pg_pools = n; - m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); + m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); if (!m->m_data_pg_pools) goto badmem; - ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); + ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); for (i = 0; i < n; i++) - m->m_data_pg_pools[i] = ceph_decode_32(p); - m->m_cas_pg_pool = ceph_decode_32(p); + m->m_data_pg_pools[i] = ceph_decode_64(p); + m->m_cas_pg_pool = ceph_decode_64(p); /* ok, we don't care about the rest. */ dout("mdsmap_decode success epoch %u\n", m->m_epoch); diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 9e0f5a8ba247..ab0a54286e0d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -39,11 +39,17 @@ * Features supported. */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES | \ - CEPH_FEATURE_CRUSH_TUNABLES2 | \ + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC | \ + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE) #define CEPH_FEATURES_REQUIRED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC) #endif diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index cb15b5d867c7..87ed09f54800 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -29,8 +29,8 @@ struct ceph_mdsmap { /* which object pools file data can be stored in */ int m_num_data_pg_pools; - u32 *m_data_pg_pools; - u32 m_cas_pg_pool; + u64 *m_data_pg_pools; + u64 m_cas_pg_pool; }; static inline struct ceph_entity_addr * diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8a612df4c248..8587746b7f0e 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -25,12 +25,22 @@ struct ceph_pg { struct ceph_pg_pool_info { struct rb_node node; - int id; - struct ceph_pg_pool v; - int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + s64 id; + u8 type; + u8 size; + u8 crush_ruleset; + u8 object_hash; + u32 pg_num, pgp_num; + int pg_num_mask, pgp_num_mask; + u64 flags; char *name; }; +struct ceph_object_locator { + uint64_t pool; + char *key; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index e7cece69b13f..d784c8dfb09a 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -8,14 +8,6 @@ #include -/* - * osdmap encoding versions - */ -#define CEPH_OSDMAP_INC_VERSION 5 -#define CEPH_OSDMAP_INC_VERSION_EXT 6 -#define CEPH_OSDMAP_VERSION 5 -#define CEPH_OSDMAP_VERSION_EXT 6 - /* * fs id */ @@ -91,21 +83,6 @@ struct ceph_pg_v1 { #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 -#define CEPH_PG_POOL_VERSION 2 -struct ceph_pg_pool { - __u8 type; /* CEPH_PG_TYPE_* */ - __u8 size; /* number of osds in each pg */ - __u8 crush_ruleset; /* crush placement rule */ - __u8 object_hash; /* hash mapping object name to ps */ - __le32 pg_num, pgp_num; /* number of pg's */ - __le32 lpg_num, lpgp_num; /* number of localized pg's */ - __le32 last_change; /* most recent epoch changed */ - __le64 snap_seq; /* seq for per-pool snapshot */ - __le32 snap_epoch; /* epoch of last snap */ - __le32 num_snaps; - __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ - __le64 auid; /* who owns the pg */ -} __attribute__ ((packed)); /* * stable_mod func is used to control number of placement groups. diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index c236c235c4a2..c5605ae96714 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -601,10 +601,8 @@ static int __init init_ceph_lib(void) if (ret < 0) goto out_crypto; - pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", - CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, - CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, - CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); + pr_info("loaded (mon/osd proto %d/%d)\n", + CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); return 0; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 61a9af634f8b..f4d4b27d6026 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -66,9 +66,9 @@ static int osdmap_show(struct seq_file *s, void *p) for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pool = rb_entry(n, struct ceph_pg_pool_info, node); - seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", - pool->id, pool->v.pg_num, pool->pg_num_mask, - pool->v.lpg_num, pool->lpg_num_mask); + seq_printf(s, "pg_pool %llu pg_num %d / %d\n", + (unsigned long long)pool->id, pool->pg_num, + pool->pg_num_mask); } for (i = 0; i < client->osdc.osdmap->max_osd; i++) { struct ceph_entity_addr *addr = diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 81118db5bd11..911919320d2e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -45,13 +45,8 @@ static int calc_bits_of(unsigned int t) */ static void calc_pg_masks(struct ceph_pg_pool_info *pi) { - pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; - pi->pgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1; - pi->lpg_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1; - pi->lpgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1; + pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1; + pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1; } /* @@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) return 0; } -static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) +static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) { struct ceph_pg_pool_info *pi; struct rb_node *n = root->rb_node; @@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) { - unsigned int n, m; + u8 ev, cv; + unsigned len, num; + void *pool_end; + + ceph_decode_need(p, end, 2 + 4, bad); + ev = ceph_decode_8(p); /* encoding version */ + cv = ceph_decode_8(p); /* compat version */ + if (ev < 5) { + pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + if (cv > 7) { + pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + len = ceph_decode_32(p); + ceph_decode_need(p, end, len, bad); + pool_end = *p + len; - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); - calc_pg_masks(pi); + pi->type = ceph_decode_8(p); + pi->size = ceph_decode_8(p); + pi->crush_ruleset = ceph_decode_8(p); + pi->object_hash = ceph_decode_8(p); - /* num_snaps * snap_info_t */ - n = le32_to_cpu(pi->v.num_snaps); - while (n--) { - ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + - sizeof(struct ceph_timespec), bad); - *p += sizeof(u64) + /* key */ - 1 + sizeof(u64) + /* u8, snapid */ - sizeof(struct ceph_timespec); - m = ceph_decode_32(p); /* snap name */ - *p += m; + pi->pg_num = ceph_decode_32(p); + pi->pgp_num = ceph_decode_32(p); + + *p += 4 + 4; /* skip lpg* */ + *p += 4; /* skip last_change */ + *p += 8 + 4; /* skip snap_seq, snap_epoch */ + + /* skip snaps */ + num = ceph_decode_32(p); + while (num--) { + *p += 8; /* snapid key */ + *p += 1 + 1; /* versions */ + len = ceph_decode_32(p); + *p += len; } - *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; + /* skip removed snaps */ + num = ceph_decode_32(p); + *p += num * (8 + 8); + + *p += 8; /* skip auid */ + pi->flags = ceph_decode_64(p); + + /* ignore the rest */ + + *p = pool_end; + calc_pg_masks(pi); return 0; bad: @@ -535,14 +563,15 @@ bad: static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) { struct ceph_pg_pool_info *pi; - u32 num, len, pool; + u32 num, len; + u64 pool; ceph_decode_32_safe(p, end, num, bad); dout(" %d pool names\n", num); while (num--) { - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); ceph_decode_32_safe(p, end, len, bad); - dout(" pool %d len %d\n", pool, len); + dout(" pool %llu len %d\n", pool, len); ceph_decode_need(p, end, len, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) { @@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) struct ceph_osdmap *map; u16 version; u32 len, max, i; - u8 ev; int err = -EINVAL; void *start = *p; struct ceph_pg_pool_info *pi; @@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) map->pg_temp = RB_ROOT; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_VERSION) { - pr_warning("got unknown v %d > %d of osdmap\n", version, - CEPH_OSDMAP_VERSION); + if (version > 6) { + pr_warning("got unknown v %d > 6 of osdmap\n", version); + goto bad; + } + if (version < 6) { + pr_warning("got old v %d < 6 of osdmap\n", version); goto bad; } @@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { - ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); + ceph_decode_need(p, end, 8 + 2, bad); err = -ENOMEM; pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; - pi->id = ceph_decode_32(p); - err = -EINVAL; - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - kfree(pi); - goto bad; - } + pi->id = ceph_decode_64(p); err = __decode_pool(p, end, pi); if (err < 0) { kfree(pi); @@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) __insert_pg_pool(&map->pg_pools, pi); } - if (version >= 5) { - err = __decode_pool_names(p, end, map); - if (err < 0) { - dout("fail to decode pool names"); - goto bad; - } + err = __decode_pool_names(p, end, map); + if (err < 0) { + dout("fail to decode pool names"); + goto bad; } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -788,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_fsid fsid; u32 epoch = 0; struct ceph_timespec modified; - u32 len, pool; - __s32 new_pool_max, new_flags, max; + s32 len; + u64 pool; + __s64 new_pool_max; + __s32 new_flags, max; void *start = *p; int err = -EINVAL; u16 version; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_INC_VERSION) { - pr_warning("got unknown v %d > %d of inc osdmap\n", version, - CEPH_OSDMAP_INC_VERSION); + if (version > 6) { + pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6); goto bad; } @@ -807,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, epoch = ceph_decode_32(p); BUG_ON(epoch != map->epoch+1); ceph_decode_copy(p, &modified, sizeof(modified)); - new_pool_max = ceph_decode_32(p); + new_pool_max = ceph_decode_64(p); new_flags = ceph_decode_32(p); /* full map? */ @@ -857,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, /* new_pool */ ceph_decode_32_safe(p, end, len, bad); while (len--) { - __u8 ev; struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); - ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - err = -EINVAL; - goto bad; - } + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (!pi) { pi = kzalloc(sizeof(*pi), GFP_NOFS); @@ -894,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) __remove_pg_pool(&map->pg_pools, pi); @@ -1097,8 +1110,8 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); if (!pool) return -EIO; - pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); - num = le32_to_cpu(pool->v.pg_num); + pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); + num = pool->pg_num; num_mask = pool->pg_num_mask; dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, @@ -1132,8 +1145,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, return NULL; /* pg_temp? */ - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), - pool->pgp_num_mask); + t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask); pgid.seed = t; pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { @@ -1142,26 +1154,24 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* crush */ - ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, + pool->type, pool->size); if (ruleno < 0) { pr_err("no crush rule pool %d ruleset %d type %d size %d\n", - poolid, pool->v.crush_ruleset, pool->v.type, - pool->v.size); + poolid, pool->crush_ruleset, pool->type, + pool->size); return NULL; } - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.pgp_num), - pool->pgp_num_mask); + pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask); pps += poolid; r = crush_do_rule(osdmap->crush, ruleno, pps, osds, - min_t(int, pool->v.size, *num), + min_t(int, pool->size, *num), osdmap->osd_weight); if (r < 0) { pr_err("error %d from crush rule: pool %d ruleset %d type %d" - " size %d\n", r, poolid, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + " size %d\n", r, poolid, pool->crush_ruleset, + pool->type, pool->size); return NULL; } *num = r; -- cgit v1.2.3 From 2169aea649c08374bec7d220a3b8f64712275356 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Feb 2013 16:13:08 -0800 Subject: libceph: calculate placement based on the internal data types Instead of using the old ceph_object_layout struct, update our internal ceph_calc_object_layout method to use the ceph_pg type. This allows us to pass the full 32-bit precision of the pgid.seed to the callers. It also allows some callers to avoid reaching into the request structures for the struct ceph_object_layout fields. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 5 +---- include/linux/ceph/osd_client.h | 1 + include/linux/ceph/osdmap.h | 2 +- net/ceph/osd_client.c | 11 +++++++---- net/ceph/osdmap.c | 18 +++++------------- 5 files changed, 15 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index fb036ed3e129..7d85991fd647 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; u64 len = 1, olen; u64 tmp; - struct ceph_object_layout ol; struct ceph_pg pgid; int r; @@ -209,11 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, + ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout, osdc->osdmap); - pgid.pool = le32_to_cpu(ol.ol_pgid.pool); - pgid.seed = le16_to_cpu(ol.ol_pgid.ps); dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { struct ceph_entity_addr *a = diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 388158ff0cbc..ad8899fc3157 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -82,6 +82,7 @@ struct ceph_osd_request { char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ int r_oid_len; + u64 r_snapid; unsigned long r_stamp; /* send OR check time */ struct ceph_file_layout r_file_layout; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8587746b7f0e..35985125f118 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -129,7 +129,7 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ -extern int ceph_calc_object_layout(struct ceph_object_layout *ol, +extern int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1990834e518b..5584f0a08e28 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -913,21 +913,18 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); static int __map_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, int force_resend) { - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; int acting[CEPH_PG_MAX_SIZE]; int o = -1, num = 0; int err; dout("map_request %p tid %lld\n", req, req->r_tid); - err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, + err = ceph_calc_object_layout(&pgid, req->r_oid, &req->r_file_layout, osdc->osdmap); if (err) { list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; } - pgid.pool = le32_to_cpu(reqhead->layout.ol_pgid.pool); - pgid.seed = le16_to_cpu(reqhead->layout.ol_pgid.ps); req->r_pgid = pgid; err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); @@ -1000,10 +997,16 @@ static void __send_request(struct ceph_osd_client *osdc, req, req->r_tid, req->r_osd->o_osd, req->r_flags); reqhead = req->r_request->front.iov_base; + reqhead->snapid = cpu_to_le64(req->r_snapid); reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch); reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ reqhead->reassert_version = req->r_reassert_version; + reqhead->layout.ol_pgid.ps = cpu_to_le16(req->r_pgid.seed); + reqhead->layout.ol_pgid.pool = cpu_to_le32(req->r_pgid.pool); + reqhead->layout.ol_pgid.preferred = cpu_to_le16(-1); + reqhead->layout.ol_stripe_unit = 0; + req->r_stamp = jiffies; list_move_tail(&req->r_req_lru_item, &osdc->req_lru); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 911919320d2e..378471644501 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1095,32 +1095,24 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping); * calculate an object layout (i.e. pgid) from an oid, * file_layout, and osdmap */ -int ceph_calc_object_layout(struct ceph_object_layout *ol, +int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap) { unsigned int num, num_mask; - struct ceph_pg pgid; struct ceph_pg_pool_info *pool; BUG_ON(!osdmap); - - pgid.pool = le32_to_cpu(fl->fl_pg_pool); - pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); + pg->pool = le32_to_cpu(fl->fl_pg_pool); + pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); if (!pool) return -EIO; - pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); + pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); num = pool->pg_num; num_mask = pool->pg_num_mask; - dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, - pgid.seed); - - ol->ol_pgid.ps = cpu_to_le16(pgid.seed); - ol->ol_pgid.pool = fl->fl_pg_pool; - ol->ol_pgid.preferred = cpu_to_le16(-1); - ol->ol_stripe_unit = fl->fl_object_stripe_unit; + dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); return 0; } EXPORT_SYMBOL(ceph_calc_object_layout); -- cgit v1.2.3 From 1b83bef24c6746a146d39915a18fb5425f2facb0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Feb 2013 16:11:12 -0800 Subject: libceph: update osd request/reply encoding Use the new version of the encoding for osd requests and replies. In the process, update the way we are tracking request ops and reply lengths and results in the struct ceph_osd_request. Update the rbd and fs/ceph users appropriately. The main changes are: - we keep pointers into the request memory for fields we need to update each time the request is sent out over the wire - we keep information about the result in an array in the request struct where the users can easily get at it. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- drivers/block/rbd.c | 52 +++++---- fs/ceph/addr.c | 31 ++---- include/linux/ceph/osd_client.h | 19 +++- include/linux/ceph/rados.h | 38 ------- net/ceph/debugfs.c | 18 +--- net/ceph/osd_client.c | 233 +++++++++++++++++++++++++++++----------- 6 files changed, 222 insertions(+), 169 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 22085e86a409..6c81a4c040b9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -196,7 +196,7 @@ struct rbd_obj_request { u64 xferred; /* bytes transferred */ u64 version; - s32 result; + int result; atomic_t done; rbd_obj_callback_t callback; @@ -1282,12 +1282,19 @@ static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) { - dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, obj_request->result, obj_request->xferred, obj_request->length); - if (obj_request->result == (s32) -ENOENT) { + /* + * ENOENT means a hole in the object. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + if (obj_request->result == -ENOENT) { zero_bio_chain(obj_request->bio_list, 0); obj_request->result = 0; + obj_request->xferred = obj_request->length; } else if (obj_request->xferred < obj_request->length && !obj_request->result) { zero_bio_chain(obj_request->bio_list, obj_request->xferred); @@ -1298,20 +1305,14 @@ static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) { - dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, - obj_request->result, obj_request->xferred, obj_request->length); - - /* A short write really shouldn't occur. Warn if we see one */ - - if (obj_request->xferred != obj_request->length) { - struct rbd_img_request *img_request = obj_request->img_request; - struct rbd_device *rbd_dev; - - rbd_dev = img_request ? img_request->rbd_dev : NULL; - rbd_warn(rbd_dev, "wrote %llu want %llu\n", - obj_request->xferred, obj_request->length); - } - + dout("%s: obj %p result %d %llu\n", __func__, obj_request, + obj_request->result, obj_request->length); + /* + * There is no such thing as a successful short write. + * Our xferred value is the number of bytes transferred + * back. Set it to our originally-requested length. + */ + obj_request->xferred = obj_request->length; obj_request_done_set(obj_request); } @@ -1329,9 +1330,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, struct ceph_msg *msg) { struct rbd_obj_request *obj_request = osd_req->r_priv; - struct ceph_osd_reply_head *reply_head; - struct ceph_osd_op *op; - u32 num_ops; u16 opcode; dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); @@ -1339,22 +1337,19 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_assert(!!obj_request->img_request ^ (obj_request->which == BAD_WHICH)); - reply_head = msg->front.iov_base; - obj_request->result = (s32) le32_to_cpu(reply_head->result); + if (osd_req->r_result < 0) + obj_request->result = osd_req->r_result; obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); - num_ops = le32_to_cpu(reply_head->num_ops); - WARN_ON(num_ops != 1); /* For now */ + WARN_ON(osd_req->r_num_ops != 1); /* For now */ /* * We support a 64-bit length, but ultimately it has to be * passed to blk_end_request(), which takes an unsigned int. */ - op = &reply_head->ops[0]; - obj_request->xferred = le64_to_cpu(op->extent.length); + obj_request->xferred = osd_req->r_reply_op_len[0]; rbd_assert(obj_request->xferred < (u64) UINT_MAX); - - opcode = le16_to_cpu(op->op); + opcode = osd_req->r_request_ops[0].op; switch (opcode) { case CEPH_OSD_OP_READ: rbd_osd_read_callback(obj_request); @@ -1719,6 +1714,7 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) more = blk_end_request(img_request->rq, result, xferred); which++; } + rbd_assert(more ^ (which == img_request->obj_request_count)); img_request->next_completion = which; out: diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index fc613715af46..cfef3e01a9b3 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page) static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - int rc, bytes; + int rc = req->r_result; + int bytes = le32_to_cpu(msg->hdr.data_len); int i; - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - rc = le32_to_cpu(replyhead->result); - bytes = le32_to_cpu(msg->hdr.data_len); - dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); /* unlock all pages, zeroing any data we didn't read */ @@ -553,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; struct ceph_inode_info *ci = ceph_inode(inode); unsigned wrote; struct page *page; int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - __s32 rc = -EIO; - u64 bytes = 0; + int rc = req->r_result; + u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); long writeback_stat; unsigned issued = ceph_caps_issued(ci); - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - if (rc >= 0) { /* * Assume we wrote the pages we originally sent. The @@ -740,8 +725,6 @@ retry: struct page *page; int want; u64 offset, len; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_op *op; long writeback_stat; next = 0; @@ -905,10 +888,8 @@ get_more_pages: /* revise final length, page count */ req->r_num_pages = locked_pages; - reqhead = req->r_request->front.iov_base; - op = (void *)(reqhead + 1); - op->extent.length = cpu_to_le64(len); - op->payload_len = cpu_to_le32(len); + req->r_request_ops[0].extent.length = cpu_to_le64(len); + req->r_request_ops[0].payload_len = cpu_to_le32(len); req->r_request->hdr.data_len = cpu_to_le32(len); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ad8899fc3157..1dd5d466b6f9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -47,6 +47,9 @@ struct ceph_osd { struct list_head o_keepalive_item; }; + +#define CEPH_OSD_MAX_OP 10 + /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ @@ -63,9 +66,23 @@ struct ceph_osd_request { struct ceph_connection *r_con_filling_msg; struct ceph_msg *r_request, *r_reply; - int r_result; int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ + int r_num_ops; + + /* encoded message content */ + struct ceph_osd_op *r_request_ops; + /* these are updated on each send */ + __le32 *r_request_osdmap_epoch; + __le32 *r_request_flags; + __le64 *r_request_pool; + void *r_request_pgid; + __le32 *r_request_attempts; + struct ceph_eversion *r_request_reassert_version; + + int r_result; + int r_reply_op_len[CEPH_OSD_MAX_OP]; + s32 r_reply_op_result[CEPH_OSD_MAX_OP]; int r_got_reply; int r_linger; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index d784c8dfb09a..68c96a508ac2 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -416,43 +416,5 @@ struct ceph_osd_op { __le32 payload_len; } __attribute__ ((packed)); -/* - * osd request message header. each request may include multiple - * ceph_osd_op object operations. - */ -struct ceph_osd_request_head { - __le32 client_inc; /* client incarnation */ - struct ceph_object_layout layout; /* pgid */ - __le32 osdmap_epoch; /* client's osdmap epoch */ - - __le32 flags; - - struct ceph_timespec mtime; /* for mutations only */ - struct ceph_eversion reassert_version; /* if we are replaying op */ - - __le32 object_len; /* length of object name */ - - __le64 snapid; /* snapid to read */ - __le64 snap_seq; /* writer's snap context */ - __le32 num_snaps; - - __le16 num_ops; - struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ -} __attribute__ ((packed)); - -struct ceph_osd_reply_head { - __le32 client_inc; /* client incarnation */ - __le32 flags; - struct ceph_object_layout layout; - __le32 osdmap_epoch; - struct ceph_eversion reassert_version; /* for replaying uncommitted */ - - __le32 result; /* result code */ - - __le32 object_len; /* length of object name */ - __le32 num_ops; - struct ceph_osd_op ops[0]; /* ops[], object */ -} __attribute__ ((packed)); - #endif diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index f4d4b27d6026..00d051f4894e 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -123,10 +123,7 @@ static int osdc_show(struct seq_file *s, void *pp) mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { struct ceph_osd_request *req; - struct ceph_osd_request_head *head; - struct ceph_osd_op *op; - int num_ops; - int opcode, olen; + int opcode; int i; req = rb_entry(p, struct ceph_osd_request, r_node); @@ -135,13 +132,7 @@ static int osdc_show(struct seq_file *s, void *pp) req->r_osd ? req->r_osd->o_osd : -1, req->r_pgid.pool, req->r_pgid.seed); - head = req->r_request->front.iov_base; - op = (void *)(head + 1); - - num_ops = le16_to_cpu(head->num_ops); - olen = le32_to_cpu(head->object_len); - seq_printf(s, "%.*s", olen, - (const char *)(head->ops + num_ops)); + seq_printf(s, "%.*s", req->r_oid_len, req->r_oid); if (req->r_reassert_version.epoch) seq_printf(s, "\t%u'%llu", @@ -150,10 +141,9 @@ static int osdc_show(struct seq_file *s, void *pp) else seq_printf(s, "\t"); - for (i = 0; i < num_ops; i++) { - opcode = le16_to_cpu(op->op); + for (i = 0; i < req->r_num_ops; i++) { + opcode = le16_to_cpu(req->r_request_ops[i].op); seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); - op++; } seq_printf(s, "\n"); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5584f0a08e28..d730dd4d8eb2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -146,15 +146,23 @@ EXPORT_SYMBOL(ceph_osdc_release_request); struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, - unsigned int num_op, + unsigned int num_ops, bool use_mempool, gfp_t gfp_flags) { struct ceph_osd_request *req; struct ceph_msg *msg; - size_t msg_size = sizeof(struct ceph_osd_request_head); - - msg_size += num_op*sizeof(struct ceph_osd_op); + size_t msg_size; + + msg_size = 4 + 4 + 8 + 8 + 4+8; + msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ + msg_size += 1 + 8 + 4 + 4; /* pg_t */ + msg_size += 4 + MAX_OBJ_NAME_SIZE; + msg_size += 2 + num_ops*sizeof(struct ceph_osd_op); + msg_size += 8; /* snapid */ + msg_size += 8; /* snap_seq */ + msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */ + msg_size += 4; if (use_mempool) { req = mempool_alloc(osdc->req_mempool, gfp_flags); @@ -193,9 +201,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, ceph_pagelist_init(&req->r_trail); /* create request message; allow space for oid */ - msg_size += MAX_OBJ_NAME_SIZE; - if (snapc) - msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else @@ -324,55 +329,80 @@ static void osd_req_encode_op(struct ceph_osd_request *req, * */ void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 len, unsigned int num_op, + u64 off, u64 len, unsigned int num_ops, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, u64 snap_id, struct timespec *mtime) { struct ceph_msg *msg = req->r_request; - struct ceph_osd_request_head *head; struct ceph_osd_req_op *src_op; - struct ceph_osd_op *op; void *p; - size_t msg_size = sizeof(*head) + num_op*sizeof(*op); + size_t msg_size; int flags = req->r_flags; u64 data_len; int i; - WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); - - head = msg->front.iov_base; - head->snapid = cpu_to_le64(snap_id); - op = (void *)(head + 1); - p = (void *)(op + num_op); - + req->r_num_ops = num_ops; + req->r_snapid = snap_id; req->r_snapc = ceph_get_snap_context(snapc); - head->client_inc = cpu_to_le32(1); /* always, for now. */ - head->flags = cpu_to_le32(flags); - if (flags & CEPH_OSD_FLAG_WRITE) - ceph_encode_timespec(&head->mtime, mtime); - BUG_ON(num_op > (unsigned int) ((u16) -1)); - head->num_ops = cpu_to_le16(num_op); + /* encode request */ + msg->hdr.version = cpu_to_le16(4); - /* fill in oid */ - head->object_len = cpu_to_le32(req->r_oid_len); + p = msg->front.iov_base; + ceph_encode_32(&p, 1); /* client_inc is always 1 */ + req->r_request_osdmap_epoch = p; + p += 4; + req->r_request_flags = p; + p += 4; + if (req->r_flags & CEPH_OSD_FLAG_WRITE) + ceph_encode_timespec(p, mtime); + p += sizeof(struct ceph_timespec); + req->r_request_reassert_version = p; + p += sizeof(struct ceph_eversion); /* will get filled in */ + + /* oloc */ + ceph_encode_8(&p, 4); + ceph_encode_8(&p, 4); + ceph_encode_32(&p, 8 + 4 + 4); + req->r_request_pool = p; + p += 8; + ceph_encode_32(&p, -1); /* preferred */ + ceph_encode_32(&p, 0); /* key len */ + + ceph_encode_8(&p, 1); + req->r_request_pgid = p; + p += 8 + 4; + ceph_encode_32(&p, -1); /* preferred */ + + /* oid */ + ceph_encode_32(&p, req->r_oid_len); memcpy(p, req->r_oid, req->r_oid_len); + dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len); p += req->r_oid_len; + /* ops */ + ceph_encode_16(&p, num_ops); src_op = src_ops; - while (num_op--) - osd_req_encode_op(req, op++, src_op++); + req->r_request_ops = p; + for (i = 0; i < num_ops; i++, src_op++) { + osd_req_encode_op(req, p, src_op); + p += sizeof(struct ceph_osd_op); + } - if (snapc) { - head->snap_seq = cpu_to_le64(snapc->seq); - head->num_snaps = cpu_to_le32(snapc->num_snaps); + /* snaps */ + ceph_encode_64(&p, req->r_snapid); + ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); + ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); + if (req->r_snapc) { for (i = 0; i < snapc->num_snaps; i++) { - put_unaligned_le64(snapc->snaps[i], p); - p += sizeof(u64); + ceph_encode_64(&p, req->r_snapc->snaps[i]); } } + req->r_request_attempts = p; + p += 4; + data_len = req->r_trail.length; if (flags & CEPH_OSD_FLAG_WRITE) { req->r_request->hdr.data_off = cpu_to_le16(off); @@ -385,6 +415,9 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; msg->hdr.front_len = cpu_to_le32(msg_size); + + dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, + num_ops); return; } EXPORT_SYMBOL(ceph_osdc_build_request); @@ -991,21 +1024,22 @@ out: static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { - struct ceph_osd_request_head *reqhead; - - dout("send_request %p tid %llu to osd%d flags %d\n", - req, req->r_tid, req->r_osd->o_osd, req->r_flags); - - reqhead = req->r_request->front.iov_base; - reqhead->snapid = cpu_to_le64(req->r_snapid); - reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch); - reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ - reqhead->reassert_version = req->r_reassert_version; + void *p; - reqhead->layout.ol_pgid.ps = cpu_to_le16(req->r_pgid.seed); - reqhead->layout.ol_pgid.pool = cpu_to_le32(req->r_pgid.pool); - reqhead->layout.ol_pgid.preferred = cpu_to_le16(-1); - reqhead->layout.ol_stripe_unit = 0; + dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n", + req, req->r_tid, req->r_osd->o_osd, req->r_flags, + (unsigned long long)req->r_pgid.pool, req->r_pgid.seed); + + /* fill in message content that changes each time we send it */ + put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch); + put_unaligned_le32(req->r_flags, req->r_request_flags); + put_unaligned_le64(req->r_pgid.pool, req->r_request_pool); + p = req->r_request_pgid; + ceph_encode_64(&p, req->r_pgid.pool); + ceph_encode_32(&p, req->r_pgid.seed); + put_unaligned_le64(1, req->r_request_attempts); /* FIXME */ + memcpy(req->r_request_reassert_version, &req->r_reassert_version, + sizeof(req->r_reassert_version)); req->r_stamp = jiffies; list_move_tail(&req->r_req_lru_item, &osdc->req_lru); @@ -1105,6 +1139,26 @@ static void complete_request(struct ceph_osd_request *req) complete_all(&req->r_safe_completion); /* fsync waiter */ } +static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid) +{ + __u8 v; + + ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad); + v = ceph_decode_8(p); + if (v > 1) { + pr_warning("do not understand pg encoding %d > 1", v); + return -EINVAL; + } + pgid->pool = ceph_decode_64(p); + pgid->seed = ceph_decode_32(p); + *p += 4; + return 0; + +bad: + pr_warning("incomplete pg encoding"); + return -EINVAL; +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1112,22 +1166,42 @@ static void complete_request(struct ceph_osd_request *req) static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_connection *con) { - struct ceph_osd_reply_head *rhead = msg->front.iov_base; + void *p, *end; struct ceph_osd_request *req; u64 tid; - int numops, object_len, flags; + int object_len; + int numops, payload_len, flags; s32 result; + s32 retry_attempt; + struct ceph_pg pg; + int err; + u32 reassert_epoch; + u64 reassert_version; + u32 osdmap_epoch; + int i; tid = le64_to_cpu(msg->hdr.tid); - if (msg->front.iov_len < sizeof(*rhead)) - goto bad; - numops = le32_to_cpu(rhead->num_ops); - object_len = le32_to_cpu(rhead->object_len); - result = le32_to_cpu(rhead->result); - if (msg->front.iov_len != sizeof(*rhead) + object_len + - numops * sizeof(struct ceph_osd_op)) + dout("handle_reply %p tid %llu\n", msg, tid); + + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + ceph_decode_need(&p, end, 4, bad); + object_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, object_len, bad); + p += object_len; + + err = __decode_pgid(&p, end, &pg); + if (err) goto bad; - dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); + + ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad); + flags = ceph_decode_64(&p); + result = ceph_decode_32(&p); + reassert_epoch = ceph_decode_32(&p); + reassert_version = ceph_decode_64(&p); + osdmap_epoch = ceph_decode_32(&p); + /* lookup */ mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); @@ -1137,7 +1211,38 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, return; } ceph_osdc_get_request(req); - flags = le32_to_cpu(rhead->flags); + + dout("handle_reply %p tid %llu req %p result %d\n", msg, tid, + req, result); + + ceph_decode_need(&p, end, 4, bad); + numops = ceph_decode_32(&p); + if (numops > CEPH_OSD_MAX_OP) + goto bad_put; + if (numops != req->r_num_ops) + goto bad_put; + payload_len = 0; + ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad); + for (i = 0; i < numops; i++) { + struct ceph_osd_op *op = p; + int len; + + len = le32_to_cpu(op->payload_len); + req->r_reply_op_len[i] = len; + dout(" op %d has %d bytes\n", i, len); + payload_len += len; + p += sizeof(*op); + } + if (payload_len != le32_to_cpu(msg->hdr.data_len)) { + pr_warning("sum of op payload lens %d != data_len %d", + payload_len, le32_to_cpu(msg->hdr.data_len)); + goto bad_put; + } + + ceph_decode_need(&p, end, 4 + numops * 4, bad); + retry_attempt = ceph_decode_32(&p); + for (i = 0; i < numops; i++) + req->r_reply_op_result[i] = ceph_decode_32(&p); /* * if this connection filled our message, drop our reference now, to @@ -1152,7 +1257,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, if (!req->r_got_reply) { unsigned int bytes; - req->r_result = le32_to_cpu(rhead->result); + req->r_result = result; bytes = le32_to_cpu(msg->hdr.data_len); dout("handle_reply result %d bytes %d\n", req->r_result, bytes); @@ -1160,7 +1265,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, req->r_result = bytes; /* in case this is a write and we need to replay, */ - req->r_reassert_version = rhead->reassert_version; + req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch); + req->r_reassert_version.version = cpu_to_le64(reassert_version); req->r_got_reply = 1; } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) { @@ -1195,10 +1301,11 @@ done: ceph_osdc_put_request(req); return; +bad_put: + ceph_osdc_put_request(req); bad: - pr_err("corrupt osd_op_reply got %d %d expected %d\n", - (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len), - (int)sizeof(*rhead)); + pr_err("corrupt osd_op_reply got %d %d\n", + (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); ceph_msg_dump(msg); } -- cgit v1.2.3 From 83ca14fdd35821554058e5fd4fa7b118ee504a33 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Feb 2013 10:39:09 -0800 Subject: libceph: add support for HASHPSPOOL pool flag The legacy behavior adds the pgid seed and pool together as the input for CRUSH. That is problematic because each pool's PGs end up mapping to the same OSDs: 1.5 == 2.4 == 3.3 == ... Instead, if the HASHPSPOOL flag is set, we has the ps and pool together and feed that into CRUSH. This ensures that two adjacent pools will map to an independent pseudorandom set of OSDs. Advertise our support for this via a protocol feature flag. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 4 +++- include/linux/ceph/osdmap.h | 2 ++ net/ceph/osdmap.c | 39 +++++++++++++++++++++++++------------- 3 files changed, 31 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index ab0a54286e0d..76554cecaab2 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -34,6 +34,7 @@ #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) #define CEPH_FEATURE_OSD_HBMSGS (1<<28) #define CEPH_FEATURE_MDSENC (1<<29) +#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) /* * Features supported. @@ -45,7 +46,8 @@ CEPH_FEATURE_OSDENC | \ CEPH_FEATURE_CRUSH_TUNABLES | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ - CEPH_FEATURE_REPLY_CREATE_INODE) + CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_OSDHASHPSPOOL) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 35985125f118..c819190d1642 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -23,6 +23,8 @@ struct ceph_pg { uint32_t seed; }; +#define CEPH_POOL_FLAG_HASHPSPOOL 1 + struct ceph_pg_pool_info { struct rb_node node; s64 id; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 378471644501..69bc4bf89e3e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1127,18 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned int poolid, ps, pps, t, r; + int r; + u32 pps; - poolid = pgid.pool; - ps = pgid.seed; - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); if (!pool) return NULL; /* pg_temp? */ - t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask); - pgid.seed = t; + pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, + pool->pgp_num_mask); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1149,20 +1147,35 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, pool->type, pool->size); if (ruleno < 0) { - pr_err("no crush rule pool %d ruleset %d type %d size %d\n", - poolid, pool->crush_ruleset, pool->type, + pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", + pgid.pool, pool->crush_ruleset, pool->type, pool->size); return NULL; } - pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask); - pps += poolid; + if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { + /* hash pool id and seed sothat pool PGs do not overlap */ + pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, + ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask), + pgid.pool); + } else { + /* + * legacy ehavior: add ps and pool together. this is + * not a great approach because the PGs from each pool + * will overlap on top of each other: 0.5 == 1.4 == + * 2.3 == ... + */ + pps = ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask) + + (unsigned)pgid.pool; + } r = crush_do_rule(osdmap->crush, ruleno, pps, osds, min_t(int, pool->size, *num), osdmap->osd_weight); if (r < 0) { - pr_err("error %d from crush rule: pool %d ruleset %d type %d" - " size %d\n", r, poolid, pool->crush_ruleset, + pr_err("error %d from crush rule: pool %lld ruleset %d type %d" + " size %d\n", r, pgid.pool, pool->crush_ruleset, pool->type, pool->size); return NULL; } -- cgit v1.2.3 From f00b4dad9d9eb001a04cf72e8351a2a1b9e99322 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 20 Dec 2012 14:14:23 +0100 Subject: dma-buf: implement vmap refcounting in the interface logic All drivers which implement this need to have some sort of refcount to allow concurrent vmap usage. Hence implement this in the dma-buf core. To protect against concurrent calls we need a lock, which potentially causes new funny locking inversions. But this shouldn't be a problem for exporters with statically allocated backing storage, and more dynamic drivers have decent issues already anyway. Inspired by some refactoring patches from Aaron Plattner, who implemented the same idea, but only for drm/prime drivers. v2: Check in dma_buf_release that no dangling vmaps are left. Suggested by Aaron Plattner. We might want to do similar checks for attachments, but that's for another patch. Also fix up ERR_PTR return for vmap. v3: Check whether the passed-in vmap address matches with the cached one for vunmap. Eventually we might want to remove that parameter - compared to the kmap functions there's no need for the vaddr for unmapping. Suggested by Chris Wilson. v4: Fix a brown-paper-bag bug spotted by Aaron Plattner. Cc: Aaron Plattner Reviewed-by: Aaron Plattner Tested-by: Aaron Plattner Reviewed-by: Rob Clark Signed-off-by: Daniel Vetter Signed-off-by: Sumit Semwal --- Documentation/dma-buf-sharing.txt | 6 +++++- drivers/base/dma-buf.c | 43 ++++++++++++++++++++++++++++++++++----- include/linux/dma-buf.h | 4 +++- 3 files changed, 46 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt index 0188903bc9e1..4966b1be42ac 100644 --- a/Documentation/dma-buf-sharing.txt +++ b/Documentation/dma-buf-sharing.txt @@ -302,7 +302,11 @@ Access to a dma_buf from the kernel context involves three steps: void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) The vmap call can fail if there is no vmap support in the exporter, or if it - runs out of vmalloc space. Fallback to kmap should be implemented. + runs out of vmalloc space. Fallback to kmap should be implemented. Note that + the dma-buf layer keeps a reference count for all vmap access and calls down + into the exporter's vmap function only when no vmapping exists, and only + unmaps it once. Protection against concurrent vmap/vunmap calls is provided + by taking the dma_buf->lock mutex. 3. Finish access diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index ff5b745c4705..394523807a6d 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file) dmabuf = file->private_data; + BUG_ON(dmabuf->vmapping_counter); + dmabuf->ops->release(dmabuf); kfree(dmabuf); return 0; @@ -481,12 +483,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap); */ void *dma_buf_vmap(struct dma_buf *dmabuf) { + void *ptr; + if (WARN_ON(!dmabuf)) return NULL; - if (dmabuf->ops->vmap) - return dmabuf->ops->vmap(dmabuf); - return NULL; + if (!dmabuf->ops->vmap) + return NULL; + + mutex_lock(&dmabuf->lock); + if (dmabuf->vmapping_counter) { + dmabuf->vmapping_counter++; + BUG_ON(!dmabuf->vmap_ptr); + ptr = dmabuf->vmap_ptr; + goto out_unlock; + } + + BUG_ON(dmabuf->vmap_ptr); + + ptr = dmabuf->ops->vmap(dmabuf); + if (IS_ERR_OR_NULL(ptr)) + goto out_unlock; + + dmabuf->vmap_ptr = ptr; + dmabuf->vmapping_counter = 1; + +out_unlock: + mutex_unlock(&dmabuf->lock); + return ptr; } EXPORT_SYMBOL_GPL(dma_buf_vmap); @@ -500,7 +524,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) if (WARN_ON(!dmabuf)) return; - if (dmabuf->ops->vunmap) - dmabuf->ops->vunmap(dmabuf, vaddr); + BUG_ON(!dmabuf->vmap_ptr); + BUG_ON(dmabuf->vmapping_counter == 0); + BUG_ON(dmabuf->vmap_ptr != vaddr); + + mutex_lock(&dmabuf->lock); + if (--dmabuf->vmapping_counter == 0) { + if (dmabuf->ops->vunmap) + dmabuf->ops->vunmap(dmabuf, vaddr); + dmabuf->vmap_ptr = NULL; + } + mutex_unlock(&dmabuf->lock); } EXPORT_SYMBOL_GPL(dma_buf_vunmap); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 3d754a394e92..9978b614a1aa 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -119,8 +119,10 @@ struct dma_buf { struct file *file; struct list_head attachments; const struct dma_buf_ops *ops; - /* mutex to serialize list manipulation and attach/detach */ + /* mutex to serialize list manipulation, attach/detach and vmap/unmap */ struct mutex lock; + unsigned vmapping_counter; + void *vmap_ptr; void *priv; }; -- cgit v1.2.3 From 864ef69b2d9b34e7c85baa9c5c601d5e735b208a Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Fri, 1 Feb 2013 18:22:52 +0000 Subject: dmaengine: add dma_request_slave_channel_compat() Adds a dma_request_slave_channel_compat() wrapper which accepts both the arguments from dma_request_channel() and dma_request_slave_channel(). Based on whether the driver is instantiated via DT, the appropriate channel request call will be made. This allows for a much cleaner migration of drivers to the dmaengine DT API as platforms continue to be mixed between those that boot using DT and those that do not. Suggested-by: Tony Lindgren Signed-off-by: Matt Porter Acked-by: Tony Lindgren Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index f5939999cb65..91ac8da25020 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1001,6 +1001,22 @@ void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); struct dma_chan *net_dma_find_channel(void); #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) +#define dma_request_slave_channel_compat(mask, x, y, dev, name) \ + __dma_request_slave_channel_compat(&(mask), x, y, dev, name) + +static inline struct dma_chan +*__dma_request_slave_channel_compat(dma_cap_mask_t *mask, dma_filter_fn fn, + void *fn_param, struct device *dev, + char *name) +{ + struct dma_chan *chan; + + chan = dma_request_slave_channel(dev, name); + if (chan) + return chan; + + return __dma_request_channel(mask, fn, fn_param); +} /* --- Helper iov-locking functions --- */ -- cgit v1.2.3 From c5a51053cf3b499ddba60a89ab067ea05ad15840 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Wed, 27 Feb 2013 17:02:43 -0800 Subject: backlight: add new lp8788 backlight driver TI LP8788 PMU supports regulators, battery charger, RTC, ADC, backlight dri= ver and current sinks. This patch enables LP8788 backlight module. (Brightness mode) The brightness is controlled by PWM input or I2C register. All modes are supported in the driver. (Platform data) Configurable data can be defined in the platform side. name : backlight driver name. (default: "lcd-backlight") initial_brightness : initial value of backlight brightness bl_mode : brightness control by PWM or lp8788 register dim_mode : dimming mode selection full_scale : full scale current setting rise_time : brightness ramp up step time fall_time : brightness ramp down step time pwm_pol : PWM polarity setting when bl_mode is PWM based period_ns : platform specific PWM period value. unit is nano. The default values are set in case no platform data is defined. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Milo(Woogyom) Kim Cc: Richard Purdie Cc: Samuel Ortiz Cc: Thierry Reding Cc: "devendra.aaru" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/Kconfig | 6 + drivers/video/backlight/Makefile | 1 + drivers/video/backlight/lp8788_bl.c | 333 ++++++++++++++++++++++++++++++++++++ include/linux/mfd/lp8788.h | 24 +-- 4 files changed, 345 insertions(+), 19 deletions(-) create mode 100644 drivers/video/backlight/lp8788_bl.c (limited to 'include/linux') diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index be27b551473f..db10d0120d2b 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -384,6 +384,12 @@ config BACKLIGHT_LP855X This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557 backlight driver. +config BACKLIGHT_LP8788 + tristate "Backlight driver for TI LP8788 MFD" + depends on BACKLIGHT_CLASS_DEVICE && MFD_LP8788 + help + This supports TI LP8788 backlight driver. + config BACKLIGHT_OT200 tristate "Backlight driver for ot200 visualisation device" depends on BACKLIGHT_CLASS_DEVICE && CS5535_MFGPT && GPIO_CS5535 diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 4606c218e8e4..96c4d620c5ce 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_BACKLIGHT_LM3630) += lm3630_bl.o obj-$(CONFIG_BACKLIGHT_LM3639) += lm3639_bl.o obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o +obj-$(CONFIG_BACKLIGHT_LP8788) += lp8788_bl.o obj-$(CONFIG_BACKLIGHT_MAX8925) += max8925_bl.o obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o obj-$(CONFIG_BACKLIGHT_OT200) += ot200_bl.o diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c new file mode 100644 index 000000000000..4bb8b4f140cf --- /dev/null +++ b/drivers/video/backlight/lp8788_bl.c @@ -0,0 +1,333 @@ +/* + * TI LP8788 MFD - backlight driver + * + * Copyright 2012 Texas Instruments + * + * Author: Milo(Woogyom) Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Register address */ +#define LP8788_BL_CONFIG 0x96 +#define LP8788_BL_EN BIT(0) +#define LP8788_BL_PWM_INPUT_EN BIT(5) +#define LP8788_BL_FULLSCALE_SHIFT 2 +#define LP8788_BL_DIM_MODE_SHIFT 1 +#define LP8788_BL_PWM_POLARITY_SHIFT 6 + +#define LP8788_BL_BRIGHTNESS 0x97 + +#define LP8788_BL_RAMP 0x98 +#define LP8788_BL_RAMP_RISE_SHIFT 4 + +#define MAX_BRIGHTNESS 127 +#define DEFAULT_BL_NAME "lcd-backlight" + +struct lp8788_bl_config { + enum lp8788_bl_ctrl_mode bl_mode; + enum lp8788_bl_dim_mode dim_mode; + enum lp8788_bl_full_scale_current full_scale; + enum lp8788_bl_ramp_step rise_time; + enum lp8788_bl_ramp_step fall_time; + enum pwm_polarity pwm_pol; +}; + +struct lp8788_bl { + struct lp8788 *lp; + struct backlight_device *bl_dev; + struct lp8788_backlight_platform_data *pdata; + enum lp8788_bl_ctrl_mode mode; + struct pwm_device *pwm; +}; + +struct lp8788_bl_config default_bl_config = { + .bl_mode = LP8788_BL_REGISTER_ONLY, + .dim_mode = LP8788_DIM_EXPONENTIAL, + .full_scale = LP8788_FULLSCALE_1900uA, + .rise_time = LP8788_RAMP_8192us, + .fall_time = LP8788_RAMP_8192us, + .pwm_pol = PWM_POLARITY_NORMAL, +}; + +static inline bool is_brightness_ctrl_by_pwm(enum lp8788_bl_ctrl_mode mode) +{ + return (mode == LP8788_BL_COMB_PWM_BASED); +} + +static inline bool is_brightness_ctrl_by_register(enum lp8788_bl_ctrl_mode mode) +{ + return (mode == LP8788_BL_REGISTER_ONLY || + mode == LP8788_BL_COMB_REGISTER_BASED); +} + +static int lp8788_backlight_configure(struct lp8788_bl *bl) +{ + struct lp8788_backlight_platform_data *pdata = bl->pdata; + struct lp8788_bl_config *cfg = &default_bl_config; + int ret; + u8 val; + + /* + * Update chip configuration if platform data exists, + * otherwise use the default settings. + */ + if (pdata) { + cfg->bl_mode = pdata->bl_mode; + cfg->dim_mode = pdata->dim_mode; + cfg->full_scale = pdata->full_scale; + cfg->rise_time = pdata->rise_time; + cfg->fall_time = pdata->fall_time; + cfg->pwm_pol = pdata->pwm_pol; + } + + /* Brightness ramp up/down */ + val = (cfg->rise_time << LP8788_BL_RAMP_RISE_SHIFT) | cfg->fall_time; + ret = lp8788_write_byte(bl->lp, LP8788_BL_RAMP, val); + if (ret) + return ret; + + /* Fullscale current setting */ + val = (cfg->full_scale << LP8788_BL_FULLSCALE_SHIFT) | + (cfg->dim_mode << LP8788_BL_DIM_MODE_SHIFT); + + /* Brightness control mode */ + switch (cfg->bl_mode) { + case LP8788_BL_REGISTER_ONLY: + val |= LP8788_BL_EN; + break; + case LP8788_BL_COMB_PWM_BASED: + case LP8788_BL_COMB_REGISTER_BASED: + val |= LP8788_BL_EN | LP8788_BL_PWM_INPUT_EN | + (cfg->pwm_pol << LP8788_BL_PWM_POLARITY_SHIFT); + break; + default: + dev_err(bl->lp->dev, "invalid mode: %d\n", cfg->bl_mode); + return -EINVAL; + } + + bl->mode = cfg->bl_mode; + + return lp8788_write_byte(bl->lp, LP8788_BL_CONFIG, val); +} + +static void lp8788_pwm_ctrl(struct lp8788_bl *bl, int br, int max_br) +{ + unsigned int period; + unsigned int duty; + struct device *dev; + struct pwm_device *pwm; + + if (!bl->pdata) + return; + + period = bl->pdata->period_ns; + duty = br * period / max_br; + dev = bl->lp->dev; + + /* request PWM device with the consumer name */ + if (!bl->pwm) { + pwm = devm_pwm_get(dev, LP8788_DEV_BACKLIGHT); + if (IS_ERR(pwm)) { + dev_err(dev, "can not get PWM device\n"); + return; + } + + bl->pwm = pwm; + } + + pwm_config(bl->pwm, duty, period); + if (duty) + pwm_enable(bl->pwm); + else + pwm_disable(bl->pwm); +} + +static int lp8788_bl_update_status(struct backlight_device *bl_dev) +{ + struct lp8788_bl *bl = bl_get_data(bl_dev); + enum lp8788_bl_ctrl_mode mode = bl->mode; + + if (bl_dev->props.state & BL_CORE_SUSPENDED) + bl_dev->props.brightness = 0; + + if (is_brightness_ctrl_by_pwm(mode)) { + int brt = bl_dev->props.brightness; + int max = bl_dev->props.max_brightness; + + lp8788_pwm_ctrl(bl, brt, max); + } else if (is_brightness_ctrl_by_register(mode)) { + u8 brt = bl_dev->props.brightness; + + lp8788_write_byte(bl->lp, LP8788_BL_BRIGHTNESS, brt); + } + + return 0; +} + +static int lp8788_bl_get_brightness(struct backlight_device *bl_dev) +{ + return bl_dev->props.brightness; +} + +static const struct backlight_ops lp8788_bl_ops = { + .options = BL_CORE_SUSPENDRESUME, + .update_status = lp8788_bl_update_status, + .get_brightness = lp8788_bl_get_brightness, +}; + +static int lp8788_backlight_register(struct lp8788_bl *bl) +{ + struct backlight_device *bl_dev; + struct backlight_properties props; + struct lp8788_backlight_platform_data *pdata = bl->pdata; + int init_brt; + char *name; + + props.type = BACKLIGHT_PLATFORM; + props.max_brightness = MAX_BRIGHTNESS; + + /* Initial brightness */ + if (pdata) + init_brt = min_t(int, pdata->initial_brightness, + props.max_brightness); + else + init_brt = 0; + + props.brightness = init_brt; + + /* Backlight device name */ + if (!pdata || !pdata->name) + name = DEFAULT_BL_NAME; + else + name = pdata->name; + + bl_dev = backlight_device_register(name, bl->lp->dev, bl, + &lp8788_bl_ops, &props); + if (IS_ERR(bl_dev)) + return PTR_ERR(bl_dev); + + bl->bl_dev = bl_dev; + + return 0; +} + +static void lp8788_backlight_unregister(struct lp8788_bl *bl) +{ + struct backlight_device *bl_dev = bl->bl_dev; + + if (bl_dev) + backlight_device_unregister(bl_dev); +} + +static ssize_t lp8788_get_bl_ctl_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lp8788_bl *bl = dev_get_drvdata(dev); + enum lp8788_bl_ctrl_mode mode = bl->mode; + char *strmode; + + if (is_brightness_ctrl_by_pwm(mode)) + strmode = "PWM based"; + else if (is_brightness_ctrl_by_register(mode)) + strmode = "Register based"; + else + strmode = "Invalid mode"; + + return scnprintf(buf, PAGE_SIZE, "%s\n", strmode); +} + +static DEVICE_ATTR(bl_ctl_mode, S_IRUGO, lp8788_get_bl_ctl_mode, NULL); + +static struct attribute *lp8788_attributes[] = { + &dev_attr_bl_ctl_mode.attr, + NULL, +}; + +static const struct attribute_group lp8788_attr_group = { + .attrs = lp8788_attributes, +}; + +static int lp8788_backlight_probe(struct platform_device *pdev) +{ + struct lp8788 *lp = dev_get_drvdata(pdev->dev.parent); + struct lp8788_bl *bl; + int ret; + + bl = devm_kzalloc(lp->dev, sizeof(struct lp8788_bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + + bl->lp = lp; + if (lp->pdata) + bl->pdata = lp->pdata->bl_pdata; + + platform_set_drvdata(pdev, bl); + + ret = lp8788_backlight_configure(bl); + if (ret) { + dev_err(lp->dev, "backlight config err: %d\n", ret); + goto err_dev; + } + + ret = lp8788_backlight_register(bl); + if (ret) { + dev_err(lp->dev, "register backlight err: %d\n", ret); + goto err_dev; + } + + ret = sysfs_create_group(&pdev->dev.kobj, &lp8788_attr_group); + if (ret) { + dev_err(lp->dev, "register sysfs err: %d\n", ret); + goto err_sysfs; + } + + backlight_update_status(bl->bl_dev); + + return 0; + +err_sysfs: + lp8788_backlight_unregister(bl); +err_dev: + return ret; +} + +static int lp8788_backlight_remove(struct platform_device *pdev) +{ + struct lp8788_bl *bl = platform_get_drvdata(pdev); + struct backlight_device *bl_dev = bl->bl_dev; + + bl_dev->props.brightness = 0; + backlight_update_status(bl_dev); + sysfs_remove_group(&pdev->dev.kobj, &lp8788_attr_group); + lp8788_backlight_unregister(bl); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver lp8788_bl_driver = { + .probe = lp8788_backlight_probe, + .remove = lp8788_backlight_remove, + .driver = { + .name = LP8788_DEV_BACKLIGHT, + .owner = THIS_MODULE, + }, +}; +module_platform_driver(lp8788_bl_driver); + +MODULE_DESCRIPTION("Texas Instruments LP8788 Backlight Driver"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:lp8788-backlight"); diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index 2a32b16f79cb..786bf6679a28 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -16,6 +16,7 @@ #include #include +#include #include #define LP8788_DEV_BUCK "lp8788-buck" @@ -124,11 +125,6 @@ enum lp8788_bl_ramp_step { LP8788_RAMP_65538us, }; -enum lp8788_bl_pwm_polarity { - LP8788_PWM_ACTIVE_HIGH, - LP8788_PWM_ACTIVE_LOW, -}; - enum lp8788_isink_scale { LP8788_ISINK_SCALE_100mA, LP8788_ISINK_SCALE_120mA, @@ -228,16 +224,6 @@ struct lp8788_charger_platform_data { enum lp8788_charger_event event); }; -/* - * struct lp8788_bl_pwm_data - * @pwm_set_intensity : set duty of pwm - * @pwm_get_intensity : get current duty of pwm - */ -struct lp8788_bl_pwm_data { - void (*pwm_set_intensity) (int brightness, int max_brightness); - int (*pwm_get_intensity) (int max_brightness); -}; - /* * struct lp8788_backlight_platform_data * @name : backlight driver name. (default: "lcd-backlight") @@ -248,8 +234,8 @@ struct lp8788_bl_pwm_data { * @rise_time : brightness ramp up step time * @fall_time : brightness ramp down step time * @pwm_pol : pwm polarity setting when bl_mode is pwm based - * @pwm_data : platform specific pwm generation functions - * only valid when bl_mode is pwm based + * @period_ns : platform specific pwm period value. unit is nano. + Only valid when bl_mode is LP8788_BL_COMB_PWM_BASED */ struct lp8788_backlight_platform_data { char *name; @@ -259,8 +245,8 @@ struct lp8788_backlight_platform_data { enum lp8788_bl_full_scale_current full_scale; enum lp8788_bl_ramp_step rise_time; enum lp8788_bl_ramp_step fall_time; - enum lp8788_bl_pwm_polarity pwm_pol; - struct lp8788_bl_pwm_data pwm_data; + enum pwm_polarity pwm_pol; + unsigned int period_ns; }; /* -- cgit v1.2.3 From a321e91b6d73ed011ffceed384c40d2785cf723b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 27 Feb 2013 17:02:56 -0800 Subject: lib/scatterlist: add simple page iterator Add an iterator to walk through a scatter list a page at a time starting at a specific page offset. As opposed to the mapping iterator this is meant to be small, performing well even in simple loops like collecting all pages on the scatterlist into an array or setting up an iommu table based on the pages' DMA address. Signed-off-by: Imre Deak Cc: Maxim Levitsky Cc: Tejun Heo Cc: Daniel Vetter Tested-by: Stephen Warren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/scatterlist.h | 35 +++++++++++++++++++++++++++++++++++ lib/scatterlist.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4bd6c06eb28e..788a853aa7a7 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -231,6 +231,41 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, */ #define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) +/* + * sg page iterator + * + * Iterates over sg entries page-by-page. On each successful iteration, + * @piter->page points to the current page, @piter->sg to the sg holding this + * page and @piter->sg_pgoffset to the page's page offset within the sg. The + * iteration will stop either when a maximum number of sg entries was reached + * or a terminating sg (sg_last(sg) == true) was reached. + */ +struct sg_page_iter { + struct page *page; /* current page */ + struct scatterlist *sg; /* sg holding the page */ + unsigned int sg_pgoffset; /* page offset within the sg */ + + /* these are internal states, keep away */ + unsigned int __nents; /* remaining sg entries */ + int __pg_advance; /* nr pages to advance at the + * next step */ +}; + +bool __sg_page_iter_next(struct sg_page_iter *piter); +void __sg_page_iter_start(struct sg_page_iter *piter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgoffset); + +/** + * for_each_sg_page - iterate over the pages of the given sg list + * @sglist: sglist to iterate over + * @piter: page iterator to hold current page, sg, sg_pgoffset + * @nents: maximum number of sg entries to iterate over + * @pgoffset: starting page offset + */ +#define for_each_sg_page(sglist, piter, nents, pgoffset) \ + for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \ + __sg_page_iter_next(piter);) /* * Mapping sg iterator diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7874b01e816e..a1d15647d7db 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -394,6 +394,44 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +void __sg_page_iter_start(struct sg_page_iter *piter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgoffset) +{ + piter->__pg_advance = 0; + piter->__nents = nents; + + piter->page = NULL; + piter->sg = sglist; + piter->sg_pgoffset = pgoffset; +} +EXPORT_SYMBOL(__sg_page_iter_start); + +static int sg_page_count(struct scatterlist *sg) +{ + return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT; +} + +bool __sg_page_iter_next(struct sg_page_iter *piter) +{ + if (!piter->__nents || !piter->sg) + return false; + + piter->sg_pgoffset += piter->__pg_advance; + piter->__pg_advance = 1; + + while (piter->sg_pgoffset >= sg_page_count(piter->sg)) { + piter->sg_pgoffset -= sg_page_count(piter->sg); + piter->sg = sg_next(piter->sg); + if (!--piter->__nents || !piter->sg) + return false; + } + piter->page = nth_page(sg_page(piter->sg), piter->sg_pgoffset); + + return true; +} +EXPORT_SYMBOL(__sg_page_iter_next); + /** * sg_miter_start - start mapping iteration over a sg list * @miter: sg mapping iter to be started -- cgit v1.2.3 From 4225fc8555a992c7f91d174ef424384d6781e144 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 27 Feb 2013 17:02:57 -0800 Subject: lib/scatterlist: use page iterator in the mapping iterator For better code reuse use the newly added page iterator to iterate through the pages. The offset, length within the page is still calculated by the mapping iterator as well as the actual mapping. Idea from Tejun Heo. Signed-off-by: Imre Deak Cc: Maxim Levitsky Cc: Tejun Heo Cc: Daniel Vetter Cc: James Hogan Cc: Stephen Warren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/host/dw_mmc.c | 4 ++-- include/linux/scatterlist.h | 6 +++--- lib/scatterlist.c | 48 ++++++++++++++++++++++----------------------- 3 files changed, 28 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 60063ccb4c4b..98342213ed21 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1453,7 +1453,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host) if (!sg_miter_next(sg_miter)) goto done; - host->sg = sg_miter->__sg; + host->sg = sg_miter->piter.sg; buf = sg_miter->addr; remain = sg_miter->length; offset = 0; @@ -1508,7 +1508,7 @@ static void dw_mci_write_data_pio(struct dw_mci *host) if (!sg_miter_next(sg_miter)) goto done; - host->sg = sg_miter->__sg; + host->sg = sg_miter->piter.sg; buf = sg_miter->addr; remain = sg_miter->length; offset = 0; diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 788a853aa7a7..2d8bdaef9611 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -293,11 +293,11 @@ struct sg_mapping_iter { void *addr; /* pointer to the mapped area */ size_t length; /* length of the mapped area */ size_t consumed; /* number of consumed bytes */ + struct sg_page_iter piter; /* page iterator */ /* these are internal states, keep away */ - struct scatterlist *__sg; /* current entry */ - unsigned int __nents; /* nr of remaining entries */ - unsigned int __offset; /* offset within sg */ + unsigned int __offset; /* offset within page */ + unsigned int __remaining; /* remaining bytes on page */ unsigned int __flags; }; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a1d15647d7db..b83c144d731f 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -449,9 +449,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, { memset(miter, 0, sizeof(struct sg_mapping_iter)); - miter->__sg = sgl; - miter->__nents = nents; - miter->__offset = 0; + __sg_page_iter_start(&miter->piter, sgl, nents, 0); WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); miter->__flags = flags; } @@ -476,36 +474,35 @@ EXPORT_SYMBOL(sg_miter_start); */ bool sg_miter_next(struct sg_mapping_iter *miter) { - unsigned int off, len; - - /* check for end and drop resources from the last iteration */ - if (!miter->__nents) - return false; - sg_miter_stop(miter); - /* get to the next sg if necessary. __offset is adjusted by stop */ - while (miter->__offset == miter->__sg->length) { - if (--miter->__nents) { - miter->__sg = sg_next(miter->__sg); - miter->__offset = 0; - } else + /* + * Get to the next page if necessary. + * __remaining, __offset is adjusted by sg_miter_stop + */ + if (!miter->__remaining) { + struct scatterlist *sg; + unsigned long pgoffset; + + if (!__sg_page_iter_next(&miter->piter)) return false; - } - /* map the next page */ - off = miter->__sg->offset + miter->__offset; - len = miter->__sg->length - miter->__offset; + sg = miter->piter.sg; + pgoffset = miter->piter.sg_pgoffset; - miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT); - off &= ~PAGE_MASK; - miter->length = min_t(unsigned int, len, PAGE_SIZE - off); - miter->consumed = miter->length; + miter->__offset = pgoffset ? 0 : sg->offset; + miter->__remaining = sg->offset + sg->length - + (pgoffset << PAGE_SHIFT) - miter->__offset; + miter->__remaining = min_t(unsigned long, miter->__remaining, + PAGE_SIZE - miter->__offset); + } + miter->page = miter->piter.page; + miter->consumed = miter->length = miter->__remaining; if (miter->__flags & SG_MITER_ATOMIC) - miter->addr = kmap_atomic(miter->page) + off; + miter->addr = kmap_atomic(miter->page) + miter->__offset; else - miter->addr = kmap(miter->page) + off; + miter->addr = kmap(miter->page) + miter->__offset; return true; } @@ -532,6 +529,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) /* drop resources from the last iteration */ if (miter->addr) { miter->__offset += miter->consumed; + miter->__remaining -= miter->consumed; if (miter->__flags & SG_MITER_TO_SG) flush_kernel_dcache_page(miter->page); -- cgit v1.2.3 From e579d2c259be42b6f29458327e5153b22414b031 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 27 Feb 2013 17:03:15 -0800 Subject: coredump: remove redundant defines for dumpable states The existing SUID_DUMP_* defines duplicate the newer SUID_DUMPABLE_* defines introduced in 54b501992dd2 ("coredump: warn about unsafe suid_dumpable / core_pattern combo"). Remove the new ones, and use the prior values instead. Signed-off-by: Kees Cook Reported-by: Chen Gang Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 2 +- fs/exec.c | 10 +++++----- fs/proc/internal.h | 3 ++- include/linux/sched.h | 5 ----- kernel/sysctl.c | 2 +- 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/coredump.c b/fs/coredump.c index 69baf903d3bd..c6479658d487 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { + if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ diff --git a/fs/exec.c b/fs/exec.c index 864c50df660a..a96a4885bbbf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) - set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); + set_dumpable(current->mm, SUID_DUMP_USER); else set_dumpable(current->mm, suid_dumpable); @@ -1639,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt); void set_dumpable(struct mm_struct *mm, int value) { switch (value) { - case SUID_DUMPABLE_DISABLED: + case SUID_DUMP_DISABLE: clear_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_ENABLED: + case SUID_DUMP_USER: set_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_SAFE: + case SUID_DUMP_ROOT: set_bit(MMF_DUMP_SECURELY, &mm->flags); smp_wmb(); set_bit(MMF_DUMPABLE, &mm->flags); @@ -1662,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags) int ret; ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; + return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; } int get_dumpable(struct mm_struct *mm) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 252544c05207..85ff3a4598b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include #include +#include struct ctl_table_header; struct mempolicy; @@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task) if (mm) dumpable = get_dumpable(mm); task_unlock(task); - if (dumpable == SUID_DUMPABLE_ENABLED) + if (dumpable == SUID_DUMP_USER) return 1; return 0; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 6853bf947fde..d35d2b6ddbfb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,11 +346,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -/* get/set_dumpable() values */ -#define SUID_DUMPABLE_DISABLED 0 -#define SUID_DUMPABLE_ENABLED 1 -#define SUID_DUMPABLE_SAFE 2 - /* mm flags */ /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d8df00e69c14..d1b4ee67d2df 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2095,7 +2095,7 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, static void validate_coredump_safety(void) { #ifdef CONFIG_COREDUMP - if (suid_dumpable == SUID_DUMPABLE_SAFE && + if (suid_dumpable == SUID_DUMP_ROOT && core_pattern[0] != '/' && core_pattern[0] != '|') { printk(KERN_WARNING "Unsafe core_pattern used with "\ "suid_dumpable=2. Pipe handler or fully qualified "\ -- cgit v1.2.3 From 6aa9707099c4b25700940eb3d016f16c4434360d Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Wed, 27 Feb 2013 17:03:18 -0800 Subject: lockdep: check that no locks held at freeze time We shouldn't try_to_freeze if locks are held. Holding a lock can cause a deadlock if the lock is later acquired in the suspend or hibernate path (e.g. by dpm). Holding a lock can also cause a deadlock in the case of cgroup_freezer if a lock is held inside a frozen cgroup that is later acquired by a process outside that group. [akpm@linux-foundation.org: export debug_check_no_locks_held] Signed-off-by: Mandeep Singh Baines Cc: Ben Chan Cc: Oleg Nesterov Cc: Tejun Heo Cc: Rafael J. Wysocki Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/debug_locks.h | 4 ++-- include/linux/freezer.h | 3 +++ kernel/exit.c | 2 +- kernel/lockdep.c | 17 ++++++++--------- 4 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3bd46f766751..a975de1ff59f 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -51,7 +51,7 @@ struct task_struct; extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void debug_check_no_locks_held(struct task_struct *task); +extern void debug_check_no_locks_held(void); #else static inline void debug_show_all_locks(void) { @@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len) } static inline void -debug_check_no_locks_held(struct task_struct *task) +debug_check_no_locks_held(void) { } #endif diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e70df40d84f6..043a5cf8b5ba 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -3,6 +3,7 @@ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED +#include #include #include #include @@ -48,6 +49,8 @@ extern void thaw_kernel_threads(void); static inline bool try_to_freeze(void) { + if (!(current->flags & PF_NOFREEZE)) + debug_check_no_locks_held(); might_sleep(); if (likely(!freezing(current))) return false; diff --git a/kernel/exit.c b/kernel/exit.c index 7dd20408707c..aaf97c122e8a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -835,7 +835,7 @@ void do_exit(long code) /* * Make sure we are holding no locks: */ - debug_check_no_locks_held(tsk); + debug_check_no_locks_held(); /* * We can do this unlocked here. The futex code uses this flag * just to verify whether the pi state cleanup has been done diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8a0efac4f99d..259db207b5d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4088,7 +4088,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); -static void print_held_locks_bug(struct task_struct *curr) +static void print_held_locks_bug(void) { if (!debug_locks_off()) return; @@ -4097,22 +4097,21 @@ static void print_held_locks_bug(struct task_struct *curr) printk("\n"); printk("=====================================\n"); - printk("[ BUG: lock held at task exit time! ]\n"); + printk("[ BUG: %s/%d still has locks held! ]\n", + current->comm, task_pid_nr(current)); print_kernel_ident(); printk("-------------------------------------\n"); - printk("%s/%d is exiting with locks still held!\n", - curr->comm, task_pid_nr(curr)); - lockdep_print_held_locks(curr); - + lockdep_print_held_locks(current); printk("\nstack backtrace:\n"); dump_stack(); } -void debug_check_no_locks_held(struct task_struct *task) +void debug_check_no_locks_held(void) { - if (unlikely(task->lockdep_depth > 0)) - print_held_locks_bug(task); + if (unlikely(current->lockdep_depth > 0)) + print_held_locks_bug(); } +EXPORT_SYMBOL_GPL(debug_check_no_locks_held); void debug_show_all_locks(void) { -- cgit v1.2.3 From fe6e24ec90b753392c3f9ec1fbca196c4e88e511 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:50 -0800 Subject: idr: deprecate idr_remove_all() There was only one legitimate use of idr_remove_all() and a lot more of incorrect uses (or lack of it). Now that idr_destroy() implies idr_remove_all() and all the in-kernel users updated not to use it, there's no reason to keep it around. Mark it deprecated so that we can later unexport it. idr_remove_all() is made an inline function calling __idr_remove_all() to avoid triggering deprecated warning on EXPORT_SYMBOL(). Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 14 +++++++++++++- lib/idr.c | 10 +++------- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index e5eb125effe6..4cf042da3892 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -110,10 +110,22 @@ int idr_for_each(struct idr *idp, void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); -void idr_remove_all(struct idr *idp); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +void __idr_remove_all(struct idr *idp); /* don't use */ + +/** + * idr_remove_all - remove all ids from the given idr tree + * @idp: idr handle + * + * If you're trying to destroy @idp, calling idr_destroy() is enough. + * This is going away. Don't use. + */ +static inline void __deprecated idr_remove_all(struct idr *idp) +{ + __idr_remove_all(idp); +} /* * IDA - IDR based id allocator, use when translation from id to diff --git a/lib/idr.c b/lib/idr.c index b8602e0b30da..814c53ce0d41 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -433,11 +433,7 @@ void idr_remove(struct idr *idp, int id) } EXPORT_SYMBOL(idr_remove); -/** - * idr_remove_all - remove all ids from the given idr tree - * @idp: idr handle - */ -void idr_remove_all(struct idr *idp) +void __idr_remove_all(struct idr *idp) { int n, id, max; int bt_mask; @@ -470,7 +466,7 @@ void idr_remove_all(struct idr *idp) } idp->layers = 0; } -EXPORT_SYMBOL(idr_remove_all); +EXPORT_SYMBOL(__idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree @@ -487,7 +483,7 @@ EXPORT_SYMBOL(idr_remove_all); */ void idr_destroy(struct idr *idp) { - idr_remove_all(idp); + __idr_remove_all(idp); while (idp->id_free_cnt) { struct idr_layer *p = get_from_free_list(idp); -- cgit v1.2.3 From 4106ecaf59b79efff3f9b466baf9e8c67e19ac5a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:51 -0800 Subject: idr: cosmetic updates to struct / initializer definitions * Tab align fields like a normal person. * Drop the unnecessary 0 inits from IDR_INIT(). This patch is purely cosmetic. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 4cf042da3892..8f4980db3524 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -49,28 +49,24 @@ #define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) struct idr_layer { - unsigned long bitmap; /* A zero bit means "space here" */ + unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1< Date: Wed, 27 Feb 2013 17:03:52 -0800 Subject: idr: relocate idr_for_each_entry() and reorganize id[r|a]_get_new() * Move idr_for_each_entry() definition next to other idr related definitions. * Make id[r|a]_get_new() inline wrappers of id[r|a]_get_new_above(). This changes the implementation of idr_get_new() but the new implementation is trivial. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 47 +++++++++++++++++++++++++++++++++++------------ lib/idr.c | 49 ------------------------------------------------- 2 files changed, 35 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 8f4980db3524..ff44bc83f3cb 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -99,7 +99,6 @@ struct idr { void *idr_find(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); -int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); @@ -109,6 +108,30 @@ void idr_remove(struct idr *idp, int id); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +/** + * idr_get_new - allocate new idr entry + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @id: pointer to the allocated handle + * + * Simple wrapper around idr_get_new_above() w/ @starting_id of zero. + */ +static inline int idr_get_new(struct idr *idp, void *ptr, int *id) +{ + return idr_get_new_above(idp, ptr, 0, id); +} + +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + void __idr_remove_all(struct idr *idp); /* don't use */ /** @@ -149,7 +172,6 @@ struct ida { int ida_pre_get(struct ida *ida, gfp_t gfp_mask); int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); -int ida_get_new(struct ida *ida, int *p_id); void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); @@ -158,17 +180,18 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, gfp_t gfp_mask); void ida_simple_remove(struct ida *ida, unsigned int id); -void __init idr_init_cache(void); - /** - * idr_for_each_entry - iterate over an idr's elements of a given type - * @idp: idr handle - * @entry: the type * to use as cursor - * @id: id entry's key + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Simple wrapper around ida_get_new_above() w/ @starting_id of zero. */ -#define idr_for_each_entry(idp, entry, id) \ - for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ - entry != NULL; \ - ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) +static inline int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} + +void __init idr_init_cache(void); #endif /* __IDR_H__ */ diff --git a/lib/idr.c b/lib/idr.c index 814c53ce0d41..282841b5a561 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -317,36 +317,6 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) } EXPORT_SYMBOL(idr_get_new_above); -/** - * idr_get_new - allocate new idr entry - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @id: pointer to the allocated handle - * - * If allocation from IDR's private freelist fails, idr_get_new_above() will - * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill - * IDR's preallocation and then retry the idr_get_new_above() call. - * - * If the idr is full idr_get_new_above() will return %-ENOSPC. - * - * @id returns a value in the range %0 ... %0x7fffffff - */ -int idr_get_new(struct idr *idp, void *ptr, int *id) -{ - int rv; - - rv = idr_get_new_above_int(idp, ptr, 0); - /* - * This is a cheap hack until the IDR code can be fixed to - * return proper error values. - */ - if (rv < 0) - return _idr_rc_to_errno(rv); - *id = rv; - return 0; -} -EXPORT_SYMBOL(idr_get_new); - static void idr_remove_warning(int id) { printk(KERN_WARNING @@ -856,25 +826,6 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) } EXPORT_SYMBOL(ida_get_new_above); -/** - * ida_get_new - allocate new ID - * @ida: idr handle - * @p_id: pointer to the allocated handle - * - * Allocate new ID. It should be called with any required locks. - * - * If memory is required, it will return %-EAGAIN, you should unlock - * and go back to the idr_pre_get() call. If the idr is full, it will - * return %-ENOSPC. - * - * @p_id returns a value in the range %0 ... %0x7fffffff. - */ -int ida_get_new(struct ida *ida, int *p_id) -{ - return ida_get_new_above(ida, 0, p_id); -} -EXPORT_SYMBOL(ida_get_new); - /** * ida_remove - remove the given ID * @ida: ida handle -- cgit v1.2.3 From 12d1b4393e0d8df36b2646a5e512f0513fb532d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:53 -0800 Subject: idr: remove _idr_rc_to_errno() hack idr uses -1, IDR_NEED_TO_GROW and IDR_NOMORE_SPACE to communicate exception conditions internally. The return value is later translated to errno values using _idr_rc_to_errno(). This is confusing. Drop the custom ones and consistently use -EAGAIN for "tree needs to grow", -ENOMEM for "need more memory" and -ENOSPC for "ran out of ID space". Due to the weird memory preloading mechanism, [ra]_get_new*() return -EAGAIN on memory shortage, so we need to substitute -ENOMEM w/ -EAGAIN on those interface functions. They'll eventually be cleaned up and the translations will go away. This patch doesn't introduce any functional changes. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 6 ------ lib/idr.c | 35 +++++++++++++++++++++++------------ 2 files changed, 23 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index ff44bc83f3cb..837f152b1383 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -70,12 +70,6 @@ struct idr { } #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) -/* Actions to be taken after a call to _idr_sub_alloc */ -#define IDR_NEED_TO_GROW -2 -#define IDR_NOMORE_SPACE -3 - -#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC) - /** * DOC: idr sync * idr synchronization (stolen from radix-tree.h) diff --git a/lib/idr.c b/lib/idr.c index 282841b5a561..bde6eecb0e87 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -133,6 +133,21 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) } EXPORT_SYMBOL(idr_pre_get); +/** + * sub_alloc - try to allocate an id without growing the tree depth + * @idp: idr handle + * @starting_id: id to start search at + * @id: pointer to the allocated handle + * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer + * + * Allocate an id in range [@starting_id, INT_MAX] from @idp without + * growing its depth. Returns + * + * the allocated id >= 0 if successful, + * -EAGAIN if the tree needs to grow for allocation to succeed, + * -ENOSPC if the id space is exhausted, + * -ENOMEM if more idr_layers need to be allocated. + */ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) { int n, m, sh; @@ -161,7 +176,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) /* if already at the top layer, we need to grow */ if (id >= 1 << (idp->layers * IDR_BITS)) { *starting_id = id; - return IDR_NEED_TO_GROW; + return -EAGAIN; } p = pa[l]; BUG_ON(!p); @@ -180,7 +195,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) id = ((id >> sh) ^ n ^ m) << sh; } if ((id >= MAX_IDR_BIT) || (id < 0)) - return IDR_NOMORE_SPACE; + return -ENOSPC; if (l == 0) break; /* @@ -189,7 +204,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) if (!p->ary[m]) { new = get_from_free_list(idp); if (!new) - return -1; + return -ENOMEM; new->layer = l-1; rcu_assign_pointer(p->ary[m], new); p->count++; @@ -215,7 +230,7 @@ build_up: layers = idp->layers; if (unlikely(!p)) { if (!(p = get_from_free_list(idp))) - return -1; + return -ENOMEM; p->layer = 0; layers = 1; } @@ -246,7 +261,7 @@ build_up: __move_to_free_list(idp, new); } spin_unlock_irqrestore(&idp->lock, flags); - return -1; + return -ENOMEM; } new->ary[0] = p; new->count = 1; @@ -258,7 +273,7 @@ build_up: rcu_assign_pointer(idp->top, p); idp->layers = layers; v = sub_alloc(idp, &id, pa); - if (v == IDR_NEED_TO_GROW) + if (v == -EAGAIN) goto build_up; return(v); } @@ -306,12 +321,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) int rv; rv = idr_get_new_above_int(idp, ptr, starting_id); - /* - * This is a cheap hack until the IDR code can be fixed to - * return proper error values. - */ if (rv < 0) - return _idr_rc_to_errno(rv); + return rv == -ENOMEM ? -EAGAIN : rv; *id = rv; return 0; } @@ -766,7 +777,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) /* get vacant slot */ t = idr_get_empty_slot(&ida->idr, idr_id, pa); if (t < 0) - return _idr_rc_to_errno(t); + return t == -ENOMEM ? -EAGAIN : t; if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT) return -ENOSPC; -- cgit v1.2.3 From d5c7409f79e14db49d00785692334657592c07ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:55 -0800 Subject: idr: implement idr_preload[_end]() and idr_alloc() The current idr interface is very cumbersome. * For all allocations, two function calls - idr_pre_get() and idr_get_new*() - should be made. * idr_pre_get() doesn't guarantee that the following idr_get_new*() will not fail from memory shortage. If idr_get_new*() returns -EAGAIN, the caller is expected to retry pre_get and allocation. * idr_get_new*() can't enforce upper limit. Upper limit can only be enforced by allocating and then freeing if above limit. * idr_layer buffer is unnecessarily per-idr. Each idr ends up keeping around MAX_IDR_FREE idr_layers. The memory consumed per idr is under two pages but it makes it difficult to make idr_layer larger. This patch implements the following new set of allocation functions. * idr_preload[_end]() - Similar to radix preload but doesn't fail. The first idr_alloc() inside preload section can be treated as if it were called with @gfp_mask used for idr_preload(). * idr_alloc() - Allocate an ID w/ lower and upper limits. Takes @gfp_flags and can be used w/o preloading. When used inside preloaded section, the allocation mask of preloading can be assumed. If idr_alloc() can be called from a context which allows sufficiently relaxed @gfp_mask, it can be used by itself. If, for example, idr_alloc() is called inside spinlock protected region, preloading can be used like the following. idr_preload(GFP_KERNEL); spin_lock(lock); id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT); spin_unlock(lock); idr_preload_end(); if (id < 0) error; which is much simpler and less error-prone than idr_pre_get and idr_get_new*() loop. The new interface uses per-pcu idr_layer buffer and thus the number of idr's in the system doesn't affect the amount of memory used for preloading. idr_layer_alloc() is introduced to handle idr_layer allocations for both old and new ID allocation paths. This is a bit hairy now but the new interface is expected to replace the old and the internal implementation eventually will become simpler. Signed-off-by: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 14 +++++ lib/idr.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 180 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 837f152b1383..6dcf133f208a 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -94,14 +94,28 @@ struct idr { void *idr_find(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); +void idr_preload(gfp_t gfp_mask); +int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); +void idr_free(struct idr *idp, int id); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +/** + * idr_preload_end - end preload section started with idr_preload() + * + * Each idr_preload() should be matched with an invocation of this + * function. See idr_preload() for details. + */ +static inline void idr_preload_end(void) +{ + preempt_enable(); +} + /** * idr_get_new - allocate new idr entry * @idp: idr handle diff --git a/lib/idr.c b/lib/idr.c index b13aae5bdc81..2d016f5c410e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -35,8 +35,12 @@ #include #include #include +#include +#include static struct kmem_cache *idr_layer_cache; +static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); +static DEFINE_PER_CPU(int, idr_preload_cnt); static DEFINE_SPINLOCK(simple_ida_lock); static struct idr_layer *get_from_free_list(struct idr *idp) @@ -54,6 +58,50 @@ static struct idr_layer *get_from_free_list(struct idr *idp) return(p); } +/** + * idr_layer_alloc - allocate a new idr_layer + * @gfp_mask: allocation mask + * @layer_idr: optional idr to allocate from + * + * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch + * one from the per-cpu preload buffer. If @layer_idr is not %NULL, fetch + * an idr_layer from @idr->id_free. + * + * @layer_idr is to maintain backward compatibility with the old alloc + * interface - idr_pre_get() and idr_get_new*() - and will be removed + * together with per-pool preload buffer. + */ +static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) +{ + struct idr_layer *new; + + /* this is the old path, bypass to get_from_free_list() */ + if (layer_idr) + return get_from_free_list(layer_idr); + + /* try to allocate directly from kmem_cache */ + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + if (new) + return new; + + /* + * Try to fetch one from the per-cpu preload buffer if in process + * context. See idr_preload() for details. + */ + if (in_interrupt()) + return NULL; + + preempt_disable(); + new = __this_cpu_read(idr_preload_head); + if (new) { + __this_cpu_write(idr_preload_head, new->ary[0]); + __this_cpu_dec(idr_preload_cnt); + new->ary[0] = NULL; + } + preempt_enable(); + return new; +} + static void idr_layer_rcu_free(struct rcu_head *head) { struct idr_layer *layer; @@ -139,6 +187,8 @@ EXPORT_SYMBOL(idr_pre_get); * @starting_id: id to start search at * @id: pointer to the allocated handle * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer + * @gfp_mask: allocation mask for idr_layer_alloc() + * @layer_idr: optional idr passed to idr_layer_alloc() * * Allocate an id in range [@starting_id, INT_MAX] from @idp without * growing its depth. Returns @@ -148,7 +198,8 @@ EXPORT_SYMBOL(idr_pre_get); * -ENOSPC if the id space is exhausted, * -ENOMEM if more idr_layers need to be allocated. */ -static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) +static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, + gfp_t gfp_mask, struct idr *layer_idr) { int n, m, sh; struct idr_layer *p, *new; @@ -202,7 +253,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) * Create the layer below if it is missing. */ if (!p->ary[m]) { - new = get_from_free_list(idp); + new = idr_layer_alloc(gfp_mask, layer_idr); if (!new) return -ENOMEM; new->layer = l-1; @@ -218,7 +269,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) } static int idr_get_empty_slot(struct idr *idp, int starting_id, - struct idr_layer **pa) + struct idr_layer **pa, gfp_t gfp_mask, + struct idr *layer_idr) { struct idr_layer *p, *new; int layers, v, id; @@ -229,7 +281,7 @@ build_up: p = idp->top; layers = idp->layers; if (unlikely(!p)) { - if (!(p = get_from_free_list(idp))) + if (!(p = idr_layer_alloc(gfp_mask, layer_idr))) return -ENOMEM; p->layer = 0; layers = 1; @@ -248,7 +300,7 @@ build_up: p->layer++; continue; } - if (!(new = get_from_free_list(idp))) { + if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) { /* * The allocation failed. If we built part of * the structure tear it down. @@ -272,7 +324,7 @@ build_up: } rcu_assign_pointer(idp->top, p); idp->layers = layers; - v = sub_alloc(idp, &id, pa); + v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr); if (v == -EAGAIN) goto build_up; return(v); @@ -312,7 +364,7 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) struct idr_layer *pa[MAX_IDR_LEVEL]; int rv; - rv = idr_get_empty_slot(idp, starting_id, pa); + rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); if (rv < 0) return rv == -ENOMEM ? -EAGAIN : rv; @@ -322,6 +374,112 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) } EXPORT_SYMBOL(idr_get_new_above); +/** + * idr_preload - preload for idr_alloc() + * @gfp_mask: allocation mask to use for preloading + * + * Preload per-cpu layer buffer for idr_alloc(). Can only be used from + * process context and each idr_preload() invocation should be matched with + * idr_preload_end(). Note that preemption is disabled while preloaded. + * + * The first idr_alloc() in the preloaded section can be treated as if it + * were invoked with @gfp_mask used for preloading. This allows using more + * permissive allocation masks for idrs protected by spinlocks. + * + * For example, if idr_alloc() below fails, the failure can be treated as + * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT. + * + * idr_preload(GFP_KERNEL); + * spin_lock(lock); + * + * id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT); + * + * spin_unlock(lock); + * idr_preload_end(); + * if (id < 0) + * error; + */ +void idr_preload(gfp_t gfp_mask) +{ + /* + * Consuming preload buffer from non-process context breaks preload + * allocation guarantee. Disallow usage from those contexts. + */ + WARN_ON_ONCE(in_interrupt()); + might_sleep_if(gfp_mask & __GFP_WAIT); + + preempt_disable(); + + /* + * idr_alloc() is likely to succeed w/o full idr_layer buffer and + * return value from idr_alloc() needs to be checked for failure + * anyway. Silently give up if allocation fails. The caller can + * treat failures from idr_alloc() as if idr_alloc() were called + * with @gfp_mask which should be enough. + */ + while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { + struct idr_layer *new; + + preempt_enable(); + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + preempt_disable(); + if (!new) + break; + + /* link the new one to per-cpu preload list */ + new->ary[0] = __this_cpu_read(idr_preload_head); + __this_cpu_write(idr_preload_head, new); + __this_cpu_inc(idr_preload_cnt); + } +} +EXPORT_SYMBOL(idr_preload); + +/** + * idr_alloc - allocate new idr entry + * @idr: the (initialized) idr + * @ptr: pointer to be associated with the new id + * @start: the minimum id (inclusive) + * @end: the maximum id (exclusive, <= 0 for max) + * @gfp_mask: memory allocation flags + * + * Allocate an id in [start, end) and associate it with @ptr. If no ID is + * available in the specified range, returns -ENOSPC. On memory allocation + * failure, returns -ENOMEM. + * + * Note that @end is treated as max when <= 0. This is to always allow + * using @start + N as @end as long as N is inside integer range. + * + * The user is responsible for exclusively synchronizing all operations + * which may modify @idr. However, read-only accesses such as idr_find() + * or iteration can be performed under RCU read lock provided the user + * destroys @ptr in RCU-safe way after removal from idr. + */ +int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) +{ + int max = end > 0 ? end - 1 : INT_MAX; /* inclusive upper limit */ + struct idr_layer *pa[MAX_IDR_LEVEL]; + int id; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + /* sanity checks */ + if (WARN_ON_ONCE(start < 0)) + return -EINVAL; + if (unlikely(max < start)) + return -ENOSPC; + + /* allocate id */ + id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL); + if (unlikely(id < 0)) + return id; + if (unlikely(id > max)) + return -ENOSPC; + + idr_fill_slot(ptr, id, pa); + return id; +} +EXPORT_SYMBOL_GPL(idr_alloc); + static void idr_remove_warning(int id) { printk(KERN_WARNING @@ -769,7 +927,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) restart: /* get vacant slot */ - t = idr_get_empty_slot(&ida->idr, idr_id, pa); + t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr); if (t < 0) return t == -ENOMEM ? -EAGAIN : t; -- cgit v1.2.3 From e8c8d1bc063bc88cfa1356266027b5075d3a82d7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:04 -0800 Subject: idr: remove MAX_IDR_MASK and move left MAX_IDR_* into idr.c MAX_IDR_MASK is another weirdness in the idr interface. As idr covers whole positive integer range, it's defined as 0x7fffffff or INT_MAX. Its usage in idr_find(), idr_replace() and idr_remove() is bizarre. They basically mask off the sign bit and operate on the rest, so if the caller, by accident, passes in a negative number, the sign bit will be masked off and the remaining part will be used as if that was the input, which is worse than crashing. The constant is visible in idr.h and there are several users in the kernel. * drivers/i2c/i2c-core.c:i2c_add_numbered_adapter() Basically used to test if adap->nr is a negative number which isn't -1 and returns -EINVAL if so. idr_alloc() already has negative @start checking (w/ WARN_ON_ONCE), so this can go away. * drivers/infiniband/core/cm.c:cm_alloc_id() drivers/infiniband/hw/mlx4/cm.c:id_map_alloc() Used to wrap cyclic @start. Can be replaced with max(next, 0). Note that this type of cyclic allocation using idr is buggy. These are prone to spurious -ENOSPC failure after the first wraparound. * fs/super.c:get_anon_bdev() The ID allocated from ida is masked off before being tested whether it's inside valid range. ida allocated ID can never be a negative number and the masking is unnecessary. Update idr_*() functions to fail with -EINVAL when negative @id is specified and update other MAX_IDR_MASK users as described above. This leaves MAX_IDR_MASK without any user, remove it and relocate other MAX_IDR_* constants to lib/idr.c. Signed-off-by: Tejun Heo Cc: Jean Delvare Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: "Marciniszyn, Mike" Cc: Jack Morgenstein Cc: Or Gerlitz Cc: Al Viro Acked-by: Wolfram Sang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/i2c/i2c-core.c | 2 -- drivers/infiniband/core/cm.c | 2 +- drivers/infiniband/hw/mlx4/cm.c | 2 +- fs/super.c | 2 +- include/linux/idr.h | 10 ---------- lib/idr.c | 24 +++++++++++++++++------- 6 files changed, 20 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 8d1f644a7fdc..991d38daa87d 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -979,8 +979,6 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap) if (adap->nr == -1) /* -1 means dynamically assign bus id */ return i2c_add_adapter(adap); - if (adap->nr & ~MAX_IDR_MASK) - return -EINVAL; mutex_lock(&core_lock); id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 98281fe5ea4b..784b97cb05b0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -390,7 +390,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); if (id >= 0) - next_id = ((unsigned) id + 1) & MAX_IDR_MASK; + next_id = max(id + 1, 0); spin_unlock_irqrestore(&cm.lock, flags); idr_preload_end(); diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 80e59ed864b3..e0d79b2395e4 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -225,7 +225,7 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) ret = idr_alloc(&sriov->pv_id_table, ent, next_id, 0, GFP_NOWAIT); if (ret >= 0) { - next_id = ((unsigned)ret + 1) & MAX_IDR_MASK; + next_id = max(ret + 1, 0); ent->pv_cm_id = (u32)ret; sl_id_map_add(ibdev, ent); list_add_tail(&ent->list, &sriov->cm_list); diff --git a/fs/super.c b/fs/super.c index 12f123712161..df6c2f4c6b59 100644 --- a/fs/super.c +++ b/fs/super.c @@ -842,7 +842,7 @@ int get_anon_bdev(dev_t *p) else if (error) return -EAGAIN; - if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) { + if (dev == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); if (unnamed_dev_start > dev) diff --git a/include/linux/idr.h b/include/linux/idr.h index 6dcf133f208a..99b0ce533f0e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -38,16 +38,6 @@ #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) -#define MAX_IDR_SHIFT (sizeof(int)*8 - 1) -#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) -#define MAX_IDR_MASK (MAX_IDR_BIT - 1) - -/* Leave the possibility of an incomplete final layer */ -#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) - -/* Number of id_layer structs to leave in free list */ -#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) - struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1< #include +#define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) +#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) + +/* Leave the possibility of an incomplete final layer */ +#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) + +/* Number of id_layer structs to leave in free list */ +#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) + static struct kmem_cache *idr_layer_cache; static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); static DEFINE_PER_CPU(int, idr_preload_cnt); @@ -542,8 +551,8 @@ void idr_remove(struct idr *idp, int id) struct idr_layer *p; struct idr_layer *to_free; - /* Mask off upper bits we don't use for the search. */ - id &= MAX_IDR_MASK; + if (WARN_ON_ONCE(id < 0)) + return; sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); if (idp->top && idp->top->count == 1 && (idp->layers > 1) && @@ -650,14 +659,14 @@ void *idr_find(struct idr *idp, int id) int n; struct idr_layer *p; + if (WARN_ON_ONCE(id < 0)) + return NULL; + p = rcu_dereference_raw(idp->top); if (!p) return NULL; n = (p->layer+1) * IDR_BITS; - /* Mask off upper bits we don't use for the search. */ - id &= MAX_IDR_MASK; - if (id > idr_max(p->layer + 1)) return NULL; BUG_ON(n == 0); @@ -799,14 +808,15 @@ void *idr_replace(struct idr *idp, void *ptr, int id) int n; struct idr_layer *p, *old_p; + if (WARN_ON_ONCE(id < 0)) + return ERR_PTR(-EINVAL); + p = idp->top; if (!p) return ERR_PTR(-EINVAL); n = (p->layer+1) * IDR_BITS; - id &= MAX_IDR_MASK; - if (id >= (1 << n)) return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 1d9b2e1e663719d406e3a770979a19ba4233bba0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:05 -0800 Subject: idr: remove length restriction from idr_layer->bitmap Currently, idr->bitmap is declared as an unsigned long which restricts the number of bits an idr_layer can contain. All bitops can handle arbitrary positive integer bit number and there's no reason for this restriction. Declare idr_layer->bitmap using DECLARE_BITMAP() instead of a single unsigned long. * idr_layer->bitmap is now an array. '&' dropped from params to bitops. * Replaced "== IDR_FULL" tests with bitmap_full() and removed IDR_FULL. * Replaced find_next_bit() on ~bitmap with find_next_zero_bit(). * Replaced "bitmap = 0" with bitmap_clear(). This patch doesn't (or at least shouldn't) introduce any behavior changes. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 12 +----------- lib/idr.c | 34 +++++++++++++++++----------------- 2 files changed, 18 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 99b0ce533f0e..63aa542da49b 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -19,18 +19,8 @@ #if BITS_PER_LONG == 32 # define IDR_BITS 5 -# define IDR_FULL 0xfffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (5 bits * 7 levels = 35 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 30) #elif BITS_PER_LONG == 64 # define IDR_BITS 6 -# define IDR_FULL 0xfffffffffffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (6 bits * 6 levels = 36 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 62) #else # error "BITS_PER_LONG is not 32 or 64" #endif @@ -39,7 +29,7 @@ #define IDR_MASK ((1 << IDR_BITS)-1) struct idr_layer { - unsigned long bitmap; /* A zero bit means "space here" */ + DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1<bitmap); + __set_bit(id & IDR_MASK, p->bitmap); /* * If this layer is full mark the bit in the layer above to * show that this part of the radix tree is full. This may * complete the layer above and require walking up the radix * tree. */ - while (p->bitmap == IDR_FULL) { + while (bitmap_full(p->bitmap, IDR_SIZE)) { if (!(p = pa[++l])) break; id = id >> IDR_BITS; - __set_bit((id & IDR_MASK), &p->bitmap); + __set_bit((id & IDR_MASK), p->bitmap); } } @@ -221,7 +221,6 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, int n, m, sh; struct idr_layer *p, *new; int l, id, oid; - unsigned long bm; id = *starting_id; restart: @@ -233,8 +232,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, * We run around this while until we reach the leaf node... */ n = (id >> (IDR_BITS*l)) & IDR_MASK; - bm = ~p->bitmap; - m = find_next_bit(&bm, IDR_SIZE, n); + m = find_next_zero_bit(p->bitmap, IDR_SIZE, n); if (m == IDR_SIZE) { /* no space available go back to previous layer. */ l++; @@ -326,7 +324,8 @@ build_up: for (new = p; p && p != idp->top; new = p) { p = p->ary[0]; new->ary[0] = NULL; - new->bitmap = new->count = 0; + new->count = 0; + bitmap_clear(new->bitmap, 0, IDR_SIZE); __move_to_free_list(idp, new); } spin_unlock_irqrestore(&idp->lock, flags); @@ -335,8 +334,8 @@ build_up: new->ary[0] = p; new->count = 1; new->layer = layers-1; - if (p->bitmap == IDR_FULL) - __set_bit(0, &new->bitmap); + if (bitmap_full(p->bitmap, IDR_SIZE)) + __set_bit(0, new->bitmap); p = new; } rcu_assign_pointer(idp->top, p); @@ -517,14 +516,14 @@ static void sub_remove(struct idr *idp, int shift, int id) while ((shift > 0) && p) { n = (id >> shift) & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); *++paa = &p->ary[n]; p = p->ary[n]; shift -= IDR_BITS; } n = id & IDR_MASK; - if (likely(p != NULL && test_bit(n, &p->bitmap))){ - __clear_bit(n, &p->bitmap); + if (likely(p != NULL && test_bit(n, p->bitmap))) { + __clear_bit(n, p->bitmap); rcu_assign_pointer(p->ary[n], NULL); to_free = NULL; while(*paa && ! --((**paa)->count)){ @@ -567,7 +566,8 @@ void idr_remove(struct idr *idp, int id) p = idp->top->ary[0]; rcu_assign_pointer(idp->top, p); --idp->layers; - to_free->bitmap = to_free->count = 0; + to_free->count = 0; + bitmap_clear(to_free->bitmap, 0, IDR_SIZE); free_layer(to_free); } while (idp->id_free_cnt >= MAX_IDR_FREE) { @@ -827,7 +827,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } n = id & IDR_MASK; - if (unlikely(p == NULL || !test_bit(n, &p->bitmap))) + if (unlikely(p == NULL || !test_bit(n, p->bitmap))) return ERR_PTR(-ENOENT); old_p = p->ary[n]; @@ -1024,7 +1024,7 @@ void ida_remove(struct ida *ida, int id) /* clear full bits while looking up the leaf idr_layer */ while ((shift > 0) && p) { n = (idr_id >> shift) & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); p = p->ary[n]; shift -= IDR_BITS; } @@ -1033,7 +1033,7 @@ void ida_remove(struct ida *ida, int id) goto err; n = idr_id & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); bitmap = (void *)p->ary[n]; if (!test_bit(offset, bitmap->bitmap)) @@ -1042,7 +1042,7 @@ void ida_remove(struct ida *ida, int id) /* update bitmap and remove it if empty */ __clear_bit(offset, bitmap->bitmap); if (--bitmap->nr_busy == 0) { - __set_bit(n, &p->bitmap); /* to please idr_remove() */ + __set_bit(n, p->bitmap); /* to please idr_remove() */ idr_remove(&ida->idr, idr_id); free_bitmap(ida, bitmap); } -- cgit v1.2.3 From 050a6b47d98e2bcea909c1129111e721668aaa2c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:06 -0800 Subject: idr: make idr_layer larger With recent preloading changes, idr no longer keeps full layer cache per each idr instance (used to be ~6.5k per idr on 64bit) and the previous patch removed restriction on the bitmap size. Both now allow us to have larger layers. Increase IDR_BITS to 8 regardless of BITS_PER_LONG. Each layer is slightly larger than 2k on 64bit and 1k on 32bit and carries 256 entries. The size isn't too large, especially compared to what we used to waste on per-idr caches, and 256 entries should be able to serve most use cases with single layer. The max tree depth is 4 which is much better than the previous 6 on 64bit and 7 on 32bit. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 63aa542da49b..43b87b1c77a3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -17,14 +17,13 @@ #include #include -#if BITS_PER_LONG == 32 -# define IDR_BITS 5 -#elif BITS_PER_LONG == 64 -# define IDR_BITS 6 -#else -# error "BITS_PER_LONG is not 32 or 64" -#endif - +/* + * We want shallower trees and thus more bits covered at each layer. 8 + * bits gives us large enough first layer for most use cases and maximum + * tree depth of 4. Each idr_layer is slightly larger than 2k on 64bit and + * 1k on 32bit. + */ +#define IDR_BITS 8 #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) -- cgit v1.2.3 From 54616283c2948812a44240858ced610e7cacbde1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:07 -0800 Subject: idr: add idr_layer->prefix Add a field which carries the prefix of ID the idr_layer covers. This will be used to implement lookup hint. This patch doesn't make use of the new field and doesn't introduce any behavior difference. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 1 + lib/idr.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 43b87b1c77a3..7b1c5c6f9a06 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -28,6 +28,7 @@ #define IDR_MASK ((1 << IDR_BITS)-1) struct idr_layer { + int prefix; /* the ID prefix of this idr_layer */ DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1<layer = l-1; + new->prefix = id & idr_layer_prefix_mask(new->layer); rcu_assign_pointer(p->ary[m], new); p->count++; } @@ -313,6 +324,7 @@ build_up: * upwards. */ p->layer++; + WARN_ON_ONCE(p->prefix); continue; } if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) { @@ -334,6 +346,7 @@ build_up: new->ary[0] = p; new->count = 1; new->layer = layers-1; + new->prefix = id & idr_layer_prefix_mask(new->layer); if (bitmap_full(p->bitmap, IDR_SIZE)) __set_bit(0, new->bitmap); p = new; -- cgit v1.2.3 From 0ffc2a9c8072969253a20821c2c733a2cbb4c7c7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:08 -0800 Subject: idr: implement lookup hint While idr lookup isn't a particularly heavy operation, it still is too substantial to use in hot paths without worrying about the performance implications. With recent changes, each idr_layer covers 256 slots which should be enough to cover most use cases with single idr_layer making lookup hint very attractive. This patch adds idr->hint which points to the idr_layer which allocated an ID most recently and the fast path lookup becomes if (look up target's prefix matches that of the hinted layer) return hint->ary[ID's offset in the leaf layer]; which can be inlined. idr->hint is set to the leaf node on idr_fill_slot() and cleared from free_layer(). [andriy.shevchenko@linux.intel.com: always do slow path when hint is uninitialized] Signed-off-by: Tejun Heo Cc: Kirill A. Shutemov Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 25 ++++++++++++++++++++++++- lib/idr.c | 38 ++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 7b1c5c6f9a06..a6f38b5c34e4 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -37,6 +37,7 @@ struct idr_layer { }; struct idr { + struct idr_layer __rcu *hint; /* the last layer allocated from */ struct idr_layer __rcu *top; struct idr_layer *id_free; int layers; /* only valid w/o concurrent changes */ @@ -71,7 +72,7 @@ struct idr { * This is what we export. */ -void *idr_find(struct idr *idp, int id); +void *idr_find_slowpath(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); void idr_preload(gfp_t gfp_mask); @@ -96,6 +97,28 @@ static inline void idr_preload_end(void) preempt_enable(); } +/** + * idr_find - return pointer for given id + * @idp: idr handle + * @id: lookup key + * + * Return the pointer given the id it has been registered with. A %NULL + * return indicates that @id is not valid or you passed %NULL in + * idr_get_new(). + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + */ +static inline void *idr_find(struct idr *idr, int id) +{ + struct idr_layer *hint = rcu_dereference_raw(idr->hint); + + if (hint && (id & ~IDR_MASK) == hint->prefix) + return rcu_dereference_raw(hint->ary[id & IDR_MASK]); + + return idr_find_slowpath(idr, id); +} + /** * idr_get_new - allocate new idr entry * @idp: idr handle diff --git a/lib/idr.c b/lib/idr.c index 5cd602936645..1a30272066c6 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -137,8 +137,10 @@ static void idr_layer_rcu_free(struct rcu_head *head) kmem_cache_free(idr_layer_cache, layer); } -static inline void free_layer(struct idr_layer *p) +static inline void free_layer(struct idr *idr, struct idr_layer *p) { + if (idr->hint && idr->hint == p) + RCU_INIT_POINTER(idr->hint, NULL); call_rcu(&p->rcu_head, idr_layer_rcu_free); } @@ -363,8 +365,12 @@ build_up: * @id and @pa are from a successful allocation from idr_get_empty_slot(). * Install the user pointer @ptr and mark the slot full. */ -static void idr_fill_slot(void *ptr, int id, struct idr_layer **pa) +static void idr_fill_slot(struct idr *idr, void *ptr, int id, + struct idr_layer **pa) { + /* update hint used for lookup, cleared from free_layer() */ + rcu_assign_pointer(idr->hint, pa[0]); + rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr); pa[0]->count++; idr_mark_full(pa, id); @@ -397,7 +403,7 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) if (rv < 0) return rv == -ENOMEM ? -EAGAIN : rv; - idr_fill_slot(ptr, rv, pa); + idr_fill_slot(idp, ptr, rv, pa); *id = rv; return 0; } @@ -504,7 +510,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) if (unlikely(id > max)) return -ENOSPC; - idr_fill_slot(ptr, id, pa); + idr_fill_slot(idr, ptr, id, pa); return id; } EXPORT_SYMBOL_GPL(idr_alloc); @@ -541,14 +547,14 @@ static void sub_remove(struct idr *idp, int shift, int id) to_free = NULL; while(*paa && ! --((**paa)->count)){ if (to_free) - free_layer(to_free); + free_layer(idp, to_free); to_free = **paa; **paa-- = NULL; } if (!*paa) idp->layers = 0; if (to_free) - free_layer(to_free); + free_layer(idp, to_free); } else idr_remove_warning(id); } @@ -581,7 +587,7 @@ void idr_remove(struct idr *idp, int id) --idp->layers; to_free->count = 0; bitmap_clear(to_free->bitmap, 0, IDR_SIZE); - free_layer(to_free); + free_layer(idp, to_free); } while (idp->id_free_cnt >= MAX_IDR_FREE) { p = get_from_free_list(idp); @@ -622,7 +628,7 @@ void __idr_remove_all(struct idr *idp) /* Get the highest bit that the above add changed from 0->1. */ while (n < fls(id ^ bt_mask)) { if (p) - free_layer(p); + free_layer(idp, p); n += IDR_BITS; p = *--paa; } @@ -655,19 +661,7 @@ void idr_destroy(struct idr *idp) } EXPORT_SYMBOL(idr_destroy); -/** - * idr_find - return pointer for given id - * @idp: idr handle - * @id: lookup key - * - * Return the pointer given the id it has been registered with. A %NULL - * return indicates that @id is not valid or you passed %NULL in - * idr_get_new(). - * - * This function can be called under rcu_read_lock(), given that the leaf - * pointers lifetimes are correctly managed. - */ -void *idr_find(struct idr *idp, int id) +void *idr_find_slowpath(struct idr *idp, int id) { int n; struct idr_layer *p; @@ -691,7 +685,7 @@ void *idr_find(struct idr *idp, int id) } return((void *)p); } -EXPORT_SYMBOL(idr_find); +EXPORT_SYMBOL(idr_find_slowpath); /** * idr_for_each - iterate through all stored pointers -- cgit v1.2.3 From 59fb1b9f5d9910c2eb97107dd0eb7e3bce8f0dde Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 27 Feb 2013 17:05:11 -0800 Subject: ipmi: remove superfluous kernel/userspace explanation Given the obvious distinction between kernel and userspace supported by uapi/, it seems unnecessary to comment on that. Signed-off-by: Robert P. J. Day Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 4 ---- include/uapi/linux/ipmi.h | 10 +--------- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 1487e7906bbd..1f9f56e28851 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,10 +35,6 @@ #include - -/* - * The in-kernel interface. - */ #include #include diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 33fbc99b3812..7b26a62e5707 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -59,15 +59,7 @@ * if it becomes full and it is queried once a second to see if * anything is in it. Incoming commands to the driver will get * delivered as commands. - * - * This driver provides two main interfaces: one for in-kernel - * applications and another for userland applications. The - * capabilities are basically the same for both interface, although - * the interfaces are somewhat different. The stuff in the - * #ifdef __KERNEL__ below is the in-kernel interface. The userland - * interface is defined later in the file. */ - - + */ /* * This is an overlay for all the address types, so it's easy to -- cgit v1.2.3 From 1d730c49a91dc5b7660269b98ad76e5a9b85740f Mon Sep 17 00:00:00 2001 From: Martin Sustrik Date: Wed, 27 Feb 2013 17:05:42 -0800 Subject: include/linux/eventfd.h: fix incorrect filename is a comment Comment in eventfd.h referred to 'include/asm-generic/fcntl.h' while the correct path is 'include/uapi/asm-generic/fcntl.h'. Signed-off-by: Martin Sustrik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/eventfd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 3c3ef19a625a..cf5d2af61b81 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -13,7 +13,7 @@ #include /* - * CAREFUL: Check include/asm-generic/fcntl.h when defining + * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want * to re-use O_* flags that couldn't possibly have a meaning * from eventfd, in order to leave a free define-space for -- cgit v1.2.3 From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 27 Feb 2013 17:06:00 -0800 Subject: hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin Acked-by: Paul E. McKenney Signed-off-by: Sasha Levin Cc: Wu Fengguang Cc: Marcelo Tosatti Cc: Gleb Natapov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/kprobes.c | 6 +- arch/ia64/kernel/kprobes.c | 8 +-- arch/mips/kernel/kprobes.c | 6 +- arch/powerpc/kernel/kprobes.c | 6 +- arch/powerpc/kvm/book3s_mmu_hpte.c | 18 ++--- arch/s390/kernel/kprobes.c | 8 +-- arch/s390/pci/pci_msi.c | 3 +- arch/sh/kernel/kprobes.c | 6 +- arch/sparc/kernel/kprobes.c | 6 +- arch/sparc/kernel/ldc.c | 3 +- arch/x86/kernel/kprobes/core.c | 8 +-- arch/x86/kvm/mmu.c | 26 +++---- block/blk-cgroup.c | 6 +- block/blk-ioc.c | 3 +- block/bsg.c | 3 +- block/cfq-iosched.c | 3 +- block/elevator.c | 4 +- crypto/algapi.c | 6 +- drivers/atm/atmtcp.c | 6 +- drivers/atm/eni.c | 3 +- drivers/atm/he.c | 3 +- drivers/atm/solos-pci.c | 3 +- drivers/clk/clk.c | 59 ++++++---------- drivers/gpu/drm/drm_hashtab.c | 19 +++-- drivers/infiniband/core/cma.c | 3 +- drivers/infiniband/core/fmr_pool.c | 3 +- drivers/isdn/mISDN/socket.c | 3 +- drivers/isdn/mISDN/stack.c | 3 +- drivers/md/dm-bio-prison.c | 3 +- drivers/md/dm-bufio.c | 3 +- drivers/md/dm-snap.c | 3 +- .../md/persistent-data/dm-transaction-manager.c | 7 +- drivers/md/raid5.c | 3 +- drivers/misc/sgi-gru/grutlbpurge.c | 3 +- drivers/misc/vmw_vmci/vmci_doorbell.c | 7 +- drivers/misc/vmw_vmci/vmci_resource.c | 6 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 18 ++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 +-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 17 +++-- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c | 10 +-- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 10 +-- drivers/net/ethernet/sun/sunvnet.c | 3 +- drivers/net/macvlan.c | 6 +- drivers/net/tun.c | 15 ++-- drivers/net/vxlan.c | 12 ++-- drivers/net/wireless/zd1201.c | 7 +- drivers/pci/pci.c | 12 ++-- drivers/staging/android/binder.c | 19 ++--- drivers/target/tcm_fc/tfc_sess.c | 12 ++-- fs/affs/amigaffs.c | 3 +- fs/aio.c | 3 +- fs/cifs/inode.c | 3 +- fs/dcache.c | 9 +-- fs/dlm/lowcomms.c | 11 ++- fs/ecryptfs/messaging.c | 6 +- fs/exportfs/expfs.c | 3 +- fs/fat/inode.c | 3 +- fs/fat/nfs.c | 3 +- fs/fscache/cookie.c | 11 ++- fs/inode.c | 19 ++--- fs/lockd/host.c | 29 ++++---- fs/lockd/svcsubs.c | 7 +- fs/nfs/pnfs_dev.c | 9 +-- fs/nfsd/nfscache.c | 3 +- fs/notify/fsnotify.c | 3 +- fs/notify/inode_mark.c | 19 +++-- fs/notify/vfsmount_mark.c | 19 +++-- fs/ocfs2/dcache.c | 3 +- fs/ocfs2/dlm/dlmrecovery.c | 6 +- fs/super.c | 6 +- fs/sysfs/bin.c | 3 +- fs/xfs/xfs_log_recover.c | 3 +- include/linux/hashtable.h | 40 +++++------ include/linux/if_team.h | 6 +- include/linux/list.h | 49 ++++++------- include/linux/pid.h | 3 +- include/linux/rculist.h | 56 +++++++-------- include/net/ax25.h | 8 +-- include/net/inet_hashtables.h | 4 +- include/net/inet_timewait_sock.h | 8 +-- include/net/netrom.h | 16 ++--- include/net/sch_generic.h | 3 +- include/net/sctp/sctp.h | 4 +- include/net/sock.h | 21 +++--- kernel/cgroup.c | 12 ++-- kernel/events/core.c | 6 +- kernel/kprobes.c | 35 ++++----- kernel/pid.c | 3 +- kernel/sched/core.c | 6 +- kernel/smpboot.c | 2 +- kernel/trace/ftrace.c | 24 +++---- kernel/trace/trace_output.c | 3 +- kernel/tracepoint.c | 6 +- kernel/user-return-notifier.c | 4 +- kernel/user.c | 3 +- kernel/workqueue.c | 13 ++-- lib/debugobjects.c | 21 +++--- lib/lru_cache.c | 3 +- mm/huge_memory.c | 3 +- mm/kmemleak.c | 9 ++- mm/ksm.c | 15 ++-- mm/mmu_notifier.c | 18 ++--- net/9p/error.c | 4 +- net/9p/trans_virtio.c | 2 +- net/appletalk/ddp.c | 9 +-- net/atm/common.c | 7 +- net/atm/lec.c | 66 ++++++++--------- net/atm/signaling.c | 3 +- net/ax25/af_ax25.c | 15 ++-- net/ax25/ax25_ds_subr.c | 6 +- net/ax25/ax25_ds_timer.c | 3 +- net/ax25/ax25_iface.c | 3 +- net/ax25/ax25_uid.c | 11 ++- net/batman-adv/bat_iv_ogm.c | 12 ++-- net/batman-adv/bridge_loop_avoidance.c | 39 ++++------ net/batman-adv/distributed-arp-table.c | 15 ++-- net/batman-adv/gateway_client.c | 13 ++-- net/batman-adv/main.c | 6 +- net/batman-adv/originator.c | 31 ++++---- net/batman-adv/originator.h | 3 +- net/batman-adv/routing.c | 6 +- net/batman-adv/send.c | 6 +- net/batman-adv/translation-table.c | 82 +++++++++------------- net/batman-adv/vis.c | 38 ++++------ net/bluetooth/hci_sock.c | 15 ++-- net/bluetooth/rfcomm/sock.c | 13 ++-- net/bluetooth/sco.c | 14 ++-- net/bridge/br_fdb.c | 23 +++--- net/bridge/br_mdb.c | 6 +- net/bridge/br_multicast.c | 25 +++---- net/can/af_can.c | 18 +++-- net/can/gw.c | 15 ++-- net/can/proc.c | 3 +- net/core/dev.c | 12 ++-- net/core/flow.c | 11 ++- net/core/net-procfs.c | 3 +- net/core/rtnetlink.c | 3 +- net/decnet/af_decnet.c | 9 +-- net/decnet/dn_table.c | 13 ++-- net/ieee802154/dgram.c | 3 +- net/ieee802154/raw.c | 3 +- net/ipv4/devinet.c | 10 +-- net/ipv4/fib_frontend.c | 15 ++-- net/ipv4/fib_semantics.c | 23 +++--- net/ipv4/fib_trie.c | 33 ++++----- net/ipv4/inet_connection_sock.c | 10 ++- net/ipv4/inet_fragment.c | 10 ++- net/ipv4/inet_hashtables.c | 8 +-- net/ipv4/inet_timewait_sock.c | 7 +- net/ipv4/raw.c | 8 +-- net/ipv4/tcp_ipv4.c | 7 +- net/ipv6/addrconf.c | 32 +++------ net/ipv6/addrlabel.c | 18 +++-- net/ipv6/inet6_connection_sock.c | 5 +- net/ipv6/ip6_fib.c | 12 ++-- net/ipv6/raw.c | 3 +- net/ipv6/xfrm6_tunnel.c | 10 ++- net/ipx/af_ipx.c | 16 ++--- net/ipx/ipx_proc.c | 5 +- net/iucv/af_iucv.c | 21 ++---- net/key/af_key.c | 3 +- net/l2tp/l2tp_core.c | 12 ++-- net/l2tp/l2tp_ip.c | 3 +- net/l2tp/l2tp_ip6.c | 3 +- net/llc/llc_sap.c | 3 +- net/mac80211/mesh_pathtbl.c | 45 +++++------- net/netfilter/ipvs/ip_vs_conn.c | 26 +++---- net/netfilter/nf_conntrack_expect.c | 17 ++--- net/netfilter/nf_conntrack_helper.c | 13 ++-- net/netfilter/nf_conntrack_netlink.c | 9 ++- net/netfilter/nf_conntrack_sip.c | 8 +-- net/netfilter/nf_nat_core.c | 3 +- net/netfilter/nfnetlink_cthelper.c | 17 ++--- net/netfilter/nfnetlink_log.c | 7 +- net/netfilter/nfnetlink_queue_core.c | 10 ++- net/netfilter/xt_RATEEST.c | 3 +- net/netfilter/xt_connlimit.c | 8 +-- net/netfilter/xt_hashlimit.c | 16 ++--- net/netlink/af_netlink.c | 30 +++----- net/netrom/af_netrom.c | 12 ++-- net/netrom/nr_route.c | 30 ++++---- net/nfc/llcp/llcp.c | 16 ++--- net/openvswitch/datapath.c | 10 ++- net/openvswitch/flow.c | 13 ++-- net/openvswitch/vport.c | 3 +- net/packet/af_packet.c | 3 +- net/packet/diag.c | 3 +- net/phonet/pep.c | 3 +- net/phonet/socket.c | 9 +-- net/rds/bind.c | 3 +- net/rds/connection.c | 9 +-- net/rose/af_rose.c | 14 ++-- net/sched/sch_api.c | 4 +- net/sched/sch_cbq.c | 18 ++--- net/sched/sch_drr.c | 10 ++- net/sched/sch_hfsc.c | 15 ++-- net/sched/sch_htb.c | 12 ++-- net/sched/sch_qfq.c | 16 ++--- net/sctp/endpointola.c | 3 +- net/sctp/input.c | 6 +- net/sctp/proc.c | 9 +-- net/sctp/socket.c | 9 ++- net/sunrpc/auth.c | 5 +- net/sunrpc/cache.c | 4 +- net/sunrpc/svcauth.c | 3 +- net/tipc/name_table.c | 8 +-- net/tipc/node.c | 3 +- net/unix/af_unix.c | 6 +- net/unix/diag.c | 7 +- net/x25/af_x25.c | 12 ++-- net/xfrm/xfrm_policy.c | 47 ++++++------- net/xfrm/xfrm_state.c | 42 +++++------ security/integrity/ima/ima_queue.c | 3 +- security/selinux/avc.c | 19 ++--- tools/perf/util/evlist.c | 3 +- virt/kvm/eventfd.c | 3 +- virt/kvm/irq_comm.c | 18 ++--- 218 files changed, 987 insertions(+), 1494 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 4dd41fc9e235..170e9f34003f 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -395,7 +395,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; @@ -415,7 +415,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -442,7 +442,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 7026b29e277a..f8280a766a78 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -423,7 +423,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = ((struct fnptr *)kretprobe_trampoline)->ip; @@ -444,7 +444,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -461,7 +461,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) regs->cr_iip = orig_ret_address; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -487,7 +487,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 158467da9bc1..ce3f0807ad1e 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -598,7 +598,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)kretprobe_trampoline; @@ -618,7 +618,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -645,7 +645,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e88c64331819..11f5b03a0b06 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -310,7 +310,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; @@ -330,7 +330,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -357,7 +357,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 2c86b0d63714..da8b13c4b776 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -124,7 +124,6 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hpte_cache *pte; - struct hlist_node *node; int i; rcu_read_lock(); @@ -132,7 +131,7 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; - hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, list, list_vpte_long) invalidate_pte(vcpu, pte); } @@ -143,7 +142,6 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ @@ -152,7 +150,7 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_pte) + hlist_for_each_entry_rcu(pte, list, list_pte) if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) invalidate_pte(vcpu, pte); @@ -163,7 +161,6 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ @@ -173,7 +170,7 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_pte_long) + hlist_for_each_entry_rcu(pte, list, list_pte_long) if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea) invalidate_pte(vcpu, pte); @@ -207,7 +204,6 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xfffffffffULL; @@ -216,7 +212,7 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_vpte) + hlist_for_each_entry_rcu(pte, list, list_vpte) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); @@ -228,7 +224,6 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xffffff000ULL; @@ -238,7 +233,7 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, list, list_vpte_long) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); @@ -266,7 +261,6 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); - struct hlist_node *node; struct hpte_cache *pte; int i; @@ -277,7 +271,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; - hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, list, list_vpte_long) if ((pte->pte.raddr >= pa_start) && (pte->pte.raddr < pa_end)) invalidate_pte(vcpu, pte); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d1c7214e157c..3388b2b2a07d 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -354,7 +354,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address; unsigned long trampoline_address; kprobe_opcode_t *correct_ret_addr; @@ -379,7 +379,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, orig_ret_address = 0; correct_ret_addr = NULL; trampoline_address = (unsigned long) &kretprobe_trampoline; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -398,7 +398,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_assert(ri, orig_ret_address, trampoline_address); correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -427,7 +427,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c index 90fd3482b9e2..0297931335e1 100644 --- a/arch/s390/pci/pci_msi.c +++ b/arch/s390/pci/pci_msi.c @@ -25,10 +25,9 @@ static DEFINE_SPINLOCK(msi_map_lock); struct msi_desc *__irq_get_msi_desc(unsigned int irq) { - struct hlist_node *entry; struct msi_map *map; - hlist_for_each_entry_rcu(map, entry, + hlist_for_each_entry_rcu(map, &msi_hash[msi_hashfn(irq)], msi_chain) if (map->irq == irq) return map->msi; diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index 1208b09e95c3..42b46e61a2d5 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -310,7 +310,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; @@ -330,7 +330,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -360,7 +360,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index a39d1ba5a119..e72212148d2a 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -511,7 +511,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; @@ -531,7 +531,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -559,7 +559,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 9fcc6b4e93b3..54df554b82d9 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -953,9 +953,8 @@ static HLIST_HEAD(ldc_channel_list); static int __ldc_channel_exists(unsigned long id) { struct ldc_channel *lp; - struct hlist_node *n; - hlist_for_each_entry(lp, n, &ldc_channel_list, list) { + hlist_for_each_entry(lp, &ldc_channel_list, list) { if (lp->id == id) return 1; } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index e124554598ee..3f06e6149981 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -652,7 +652,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; @@ -682,7 +682,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * will be the real return address, and all the rest will * point to kretprobe_trampoline. */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -701,7 +701,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -728,7 +728,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4ed3edbe06bd..956ca358108a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1644,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); -#define for_each_gfn_sp(kvm, sp, gfn, pos) \ - hlist_for_each_entry(sp, pos, \ +#define for_each_gfn_sp(kvm, sp, gfn) \ + hlist_for_each_entry(sp, \ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ if ((sp)->gfn != (gfn)) {} else -#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ - hlist_for_each_entry(sp, pos, \ +#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ + hlist_for_each_entry(sp, \ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ if ((sp)->gfn != (gfn) || (sp)->role.direct || \ (sp)->role.invalid) {} else @@ -1706,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) { struct kvm_mmu_page *s; - struct hlist_node *node; LIST_HEAD(invalid_list); bool flush = false; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!s->unsync) continue; @@ -1848,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, union kvm_mmu_page_role role; unsigned quadrant; struct kvm_mmu_page *sp; - struct hlist_node *node; bool need_sync = false; role = vcpu->arch.mmu.base_role; @@ -1863,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } - for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { + for_each_gfn_sp(vcpu->kvm, sp, gfn) { if (!need_sync && sp->unsync) need_sync = true; @@ -2151,14 +2149,13 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) { struct kvm_mmu_page *sp; - struct hlist_node *node; LIST_HEAD(invalid_list); int r; pgprintk("%s: looking for gfn %llx\n", __func__, gfn); r = 0; spin_lock(&kvm->mmu_lock); - for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { + for_each_gfn_indirect_valid_sp(kvm, sp, gfn) { pgprintk("%s: gfn %llx role %x\n", __func__, gfn, sp->role.word); r = 1; @@ -2288,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) { struct kvm_mmu_page *s; - struct hlist_node *node; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (s->unsync) continue; WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); @@ -2302,10 +2298,9 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *s; - struct hlist_node *node; bool need_unsync = false; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!can_unsync) return 1; @@ -3933,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn = gpa >> PAGE_SHIFT; union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; - struct hlist_node *node; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; @@ -3964,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; - for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b8858fb0cafa..8bdebb6781e1 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -357,7 +357,6 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, { struct blkcg *blkcg = cgroup_to_blkcg(cgroup); struct blkcg_gq *blkg; - struct hlist_node *n; int i; mutex_lock(&blkcg_pol_mutex); @@ -368,7 +367,7 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, * stat updates. This is a debug feature which shouldn't exist * anyway. If you get hit by a race, retry. */ - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -415,11 +414,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, bool show_total) { struct blkcg_gq *blkg; - struct hlist_node *n; u64 total = 0; spin_lock_irq(&blkcg->lock); - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) + hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) if (blkcg_policy_enabled(blkg->q, pol)) total += prfill(sf, blkg->pd[pol->plid], data); spin_unlock_irq(&blkcg->lock); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index fab4cdd3f7bb..9c4bb8266bc8 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -164,7 +164,6 @@ EXPORT_SYMBOL(put_io_context); */ void put_io_context_active(struct io_context *ioc) { - struct hlist_node *n; unsigned long flags; struct io_cq *icq; @@ -180,7 +179,7 @@ void put_io_context_active(struct io_context *ioc) */ retry: spin_lock_irqsave_nested(&ioc->lock, flags, 1); - hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) { + hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) { if (icq->flags & ICQ_EXITED) continue; if (spin_trylock(icq->q->queue_lock)) { diff --git a/block/bsg.c b/block/bsg.c index 3ca92ebf6bbb..420a5a9f1b23 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -800,11 +800,10 @@ static struct bsg_device *bsg_add_device(struct inode *inode, static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) { struct bsg_device *bd; - struct hlist_node *entry; mutex_lock(&bsg_mutex); - hlist_for_each_entry(bd, entry, bsg_dev_idx_hash(minor), dev_list) { + hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { if (bd->queue == q) { atomic_inc(&bd->ref_count); goto found; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e62e9205b80a..ec52807cdd09 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1435,7 +1435,6 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) { struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkcg_gq *blkg; - struct hlist_node *n; if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) return -EINVAL; @@ -1443,7 +1442,7 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) spin_lock_irq(&blkcg->lock); blkcg->cfq_weight = (unsigned int)val; - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct cfq_group *cfqg = blkg_to_cfqg(blkg); if (cfqg && !cfqg->dev_weight) diff --git a/block/elevator.c b/block/elevator.c index 603b2c178740..d0acb31cc083 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -288,10 +288,10 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) { struct elevator_queue *e = q->elevator; struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; - struct hlist_node *entry, *next; + struct hlist_node *next; struct request *rq; - hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { + hlist_for_each_entry_safe(rq, next, hash_list, hash) { BUG_ON(!ELV_ON_HASH(rq)); if (unlikely(!rq_mergeable(rq))) { diff --git a/crypto/algapi.c b/crypto/algapi.c index 08c57c8aec95..6149a6e09643 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -447,7 +447,7 @@ EXPORT_SYMBOL_GPL(crypto_register_template); void crypto_unregister_template(struct crypto_template *tmpl) { struct crypto_instance *inst; - struct hlist_node *p, *n; + struct hlist_node *n; struct hlist_head *list; LIST_HEAD(users); @@ -457,7 +457,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) list_del_init(&tmpl->list); list = &tmpl->instances; - hlist_for_each_entry(inst, p, list, list) { + hlist_for_each_entry(inst, list, list) { int err = crypto_remove_alg(&inst->alg, &users); BUG_ON(err); } @@ -466,7 +466,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) up_write(&crypto_alg_sem); - hlist_for_each_entry_safe(inst, p, n, list, list) { + hlist_for_each_entry_safe(inst, n, list, list) { BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1); tmpl->free(inst); } diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index b22d71cac54c..0e3f8f9dcd29 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -157,7 +157,6 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) { struct atm_cirange ci; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; int i; @@ -171,7 +170,7 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) for(i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev != dev) continue; @@ -264,12 +263,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) { struct hlist_head *head; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == dev && vcc->vci == vci && vcc->vpi == vpi && diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index c1eb6fa8ac35..b1955ba40d63 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2093,7 +2093,6 @@ static unsigned char eni_phy_get(struct atm_dev *dev,unsigned long addr) static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page) { - struct hlist_node *node; struct sock *s; static const char *signal[] = { "LOST","unknown","okay" }; struct eni_dev *eni_dev = ENI_DEV(dev); @@ -2171,7 +2170,7 @@ static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page) for(i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { struct eni_vcc *eni_vcc; int length; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 72b6960fa95f..d6891267f5bb 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -329,7 +329,6 @@ __find_vcc(struct he_dev *he_dev, unsigned cid) { struct hlist_head *head; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; short vpi; int vci; @@ -338,7 +337,7 @@ __find_vcc(struct he_dev *he_dev, unsigned cid) vci = cid & ((1 << he_dev->vcibits) - 1); head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == he_dev->atm_dev && vcc->vci == vci && vcc->vpi == vpi && diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 0474a89170b9..32784d18d1f7 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -896,12 +896,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) { struct hlist_head *head; struct atm_vcc *vcc = NULL; - struct hlist_node *node; struct sock *s; read_lock(&vcc_sklist_lock); head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == dev && vcc->vci == vci && vcc->vpi == vpi && vcc->qos.rxtp.traffic_class != ATM_NONE && diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index fabbfe1a9253..ed87b2405806 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -52,31 +52,29 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk *c, int level) { struct clk *child; - struct hlist_node *tmp; if (!c) return; clk_summary_show_one(s, c, level); - hlist_for_each_entry(child, tmp, &c->children, child_node) + hlist_for_each_entry(child, &c->children, child_node) clk_summary_show_subtree(s, child, level + 1); } static int clk_summary_show(struct seq_file *s, void *data) { struct clk *c; - struct hlist_node *tmp; seq_printf(s, " clock enable_cnt prepare_cnt rate\n"); seq_printf(s, "---------------------------------------------------------------------\n"); mutex_lock(&prepare_lock); - hlist_for_each_entry(c, tmp, &clk_root_list, child_node) + hlist_for_each_entry(c, &clk_root_list, child_node) clk_summary_show_subtree(s, c, 0); - hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(c, &clk_orphan_list, child_node) clk_summary_show_subtree(s, c, 0); mutex_unlock(&prepare_lock); @@ -111,14 +109,13 @@ static void clk_dump_one(struct seq_file *s, struct clk *c, int level) static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level) { struct clk *child; - struct hlist_node *tmp; if (!c) return; clk_dump_one(s, c, level); - hlist_for_each_entry(child, tmp, &c->children, child_node) { + hlist_for_each_entry(child, &c->children, child_node) { seq_printf(s, ","); clk_dump_subtree(s, child, level + 1); } @@ -129,21 +126,20 @@ static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level) static int clk_dump(struct seq_file *s, void *data) { struct clk *c; - struct hlist_node *tmp; bool first_node = true; seq_printf(s, "{"); mutex_lock(&prepare_lock); - hlist_for_each_entry(c, tmp, &clk_root_list, child_node) { + hlist_for_each_entry(c, &clk_root_list, child_node) { if (!first_node) seq_printf(s, ","); first_node = false; clk_dump_subtree(s, c, 0); } - hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) { + hlist_for_each_entry(c, &clk_orphan_list, child_node) { seq_printf(s, ","); clk_dump_subtree(s, c, 0); } @@ -222,7 +218,6 @@ out: static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) { struct clk *child; - struct hlist_node *tmp; int ret = -EINVAL;; if (!clk || !pdentry) @@ -233,7 +228,7 @@ static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) if (ret) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_debug_create_subtree(child, clk->dentry); ret = 0; @@ -299,7 +294,6 @@ out: static int __init clk_debug_init(void) { struct clk *clk; - struct hlist_node *tmp; struct dentry *d; rootdir = debugfs_create_dir("clk", NULL); @@ -324,10 +318,10 @@ static int __init clk_debug_init(void) mutex_lock(&prepare_lock); - hlist_for_each_entry(clk, tmp, &clk_root_list, child_node) + hlist_for_each_entry(clk, &clk_root_list, child_node) clk_debug_create_subtree(clk, rootdir); - hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(clk, &clk_orphan_list, child_node) clk_debug_create_subtree(clk, orphandir); inited = 1; @@ -345,13 +339,12 @@ static inline int clk_debug_register(struct clk *clk) { return 0; } static void clk_disable_unused_subtree(struct clk *clk) { struct clk *child; - struct hlist_node *tmp; unsigned long flags; if (!clk) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_disable_unused_subtree(child); spin_lock_irqsave(&enable_lock, flags); @@ -384,14 +377,13 @@ out: static int clk_disable_unused(void) { struct clk *clk; - struct hlist_node *tmp; mutex_lock(&prepare_lock); - hlist_for_each_entry(clk, tmp, &clk_root_list, child_node) + hlist_for_each_entry(clk, &clk_root_list, child_node) clk_disable_unused_subtree(clk); - hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(clk, &clk_orphan_list, child_node) clk_disable_unused_subtree(clk); mutex_unlock(&prepare_lock); @@ -484,12 +476,11 @@ static struct clk *__clk_lookup_subtree(const char *name, struct clk *clk) { struct clk *child; struct clk *ret; - struct hlist_node *tmp; if (!strcmp(clk->name, name)) return clk; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { ret = __clk_lookup_subtree(name, child); if (ret) return ret; @@ -502,20 +493,19 @@ struct clk *__clk_lookup(const char *name) { struct clk *root_clk; struct clk *ret; - struct hlist_node *tmp; if (!name) return NULL; /* search the 'proper' clk tree first */ - hlist_for_each_entry(root_clk, tmp, &clk_root_list, child_node) { + hlist_for_each_entry(root_clk, &clk_root_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; } /* if not found, then search the orphan tree */ - hlist_for_each_entry(root_clk, tmp, &clk_orphan_list, child_node) { + hlist_for_each_entry(root_clk, &clk_orphan_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; @@ -812,7 +802,6 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg) { unsigned long old_rate; unsigned long parent_rate = 0; - struct hlist_node *tmp; struct clk *child; old_rate = clk->rate; @@ -832,7 +821,7 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg) if (clk->notifier_count && msg) __clk_notify(clk, msg, old_rate, clk->rate); - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) __clk_recalc_rates(child, msg); } @@ -878,7 +867,6 @@ EXPORT_SYMBOL_GPL(clk_get_rate); */ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate) { - struct hlist_node *tmp; struct clk *child; unsigned long new_rate; int ret = NOTIFY_DONE; @@ -895,7 +883,7 @@ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate) if (ret == NOTIFY_BAD) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { ret = __clk_speculate_rates(child, new_rate); if (ret == NOTIFY_BAD) break; @@ -908,11 +896,10 @@ out: static void clk_calc_subtree(struct clk *clk, unsigned long new_rate) { struct clk *child; - struct hlist_node *tmp; clk->new_rate = new_rate; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { if (child->ops->recalc_rate) child->new_rate = child->ops->recalc_rate(child->hw, new_rate); else @@ -983,7 +970,6 @@ out: */ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long event) { - struct hlist_node *tmp; struct clk *child, *fail_clk = NULL; int ret = NOTIFY_DONE; @@ -996,7 +982,7 @@ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long even fail_clk = clk; } - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { clk = clk_propagate_rate_change(child, event); if (clk) fail_clk = clk; @@ -1014,7 +1000,6 @@ static void clk_change_rate(struct clk *clk) struct clk *child; unsigned long old_rate; unsigned long best_parent_rate = 0; - struct hlist_node *tmp; old_rate = clk->rate; @@ -1032,7 +1017,7 @@ static void clk_change_rate(struct clk *clk) if (clk->notifier_count && old_rate != clk->rate) __clk_notify(clk, POST_RATE_CHANGE, old_rate, clk->rate); - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_change_rate(child); } @@ -1348,7 +1333,7 @@ int __clk_init(struct device *dev, struct clk *clk) { int i, ret = 0; struct clk *orphan; - struct hlist_node *tmp, *tmp2; + struct hlist_node *tmp2; if (!clk) return -EINVAL; @@ -1448,7 +1433,7 @@ int __clk_init(struct device *dev, struct clk *clk) * walk the list of orphan clocks and reparent any that are children of * this clock */ - hlist_for_each_entry_safe(orphan, tmp, tmp2, &clk_orphan_list, child_node) { + hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) { if (orphan->ops->get_parent) { i = orphan->ops->get_parent(orphan->hw); if (!strcmp(clk->name, orphan->parent_names[i])) diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c index 80254547a3f8..7e4bae760e27 100644 --- a/drivers/gpu/drm/drm_hashtab.c +++ b/drivers/gpu/drm/drm_hashtab.c @@ -60,14 +60,13 @@ void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key) { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; int count = 0; hashed_key = hash_long(key, ht->order); DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, list, h_list, head) + hlist_for_each_entry(entry, h_list, head) DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); } @@ -76,14 +75,13 @@ static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht, { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, list, h_list, head) { + hlist_for_each_entry(entry, h_list, head) { if (entry->key == key) - return list; + return &entry->head; if (entry->key > key) break; } @@ -95,14 +93,13 @@ static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht, { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; - hlist_for_each_entry_rcu(entry, list, h_list, head) { + hlist_for_each_entry_rcu(entry, h_list, head) { if (entry->key == key) - return list; + return &entry->head; if (entry->key > key) break; } @@ -113,19 +110,19 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list, *parent; + struct hlist_node *parent; unsigned int hashed_key; unsigned long key = item->key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; parent = NULL; - hlist_for_each_entry(entry, list, h_list, head) { + hlist_for_each_entry(entry, h_list, head) { if (entry->key == key) return -EINVAL; if (entry->key > key) break; - parent = list; + parent = &entry->head; } if (parent) { hlist_add_after_rcu(parent, &item->head); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c32eeaa3f3b1..71c2c7116802 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2204,10 +2204,9 @@ static int cma_check_port(struct rdma_bind_list *bind_list, { struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; - struct hlist_node *node; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { + hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 176c8f90f2bb..9f5ad7cc33c8 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -118,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, { struct hlist_head *bucket; struct ib_pool_fmr *fmr; - struct hlist_node *pos; if (!pool->cache_bucket) return NULL; bucket = pool->cache_bucket + ib_fmr_hash(*page_list); - hlist_for_each_entry(fmr, pos, bucket, cache_node) + hlist_for_each_entry(fmr, bucket, cache_node) if (io_virtual_address == fmr->io_virtual_address && page_list_len == fmr->page_list_len && !memcmp(page_list, fmr->page_list, diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index abe2d699b6f3..8b07f83d48ad 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -483,7 +483,6 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr; struct sock *sk = sock->sk; - struct hlist_node *node; struct sock *csk; int err = 0; @@ -508,7 +507,7 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (sk->sk_protocol < ISDN_P_B_START) { read_lock_bh(&data_sockets.lock); - sk_for_each(csk, node, &data_sockets.head) { + sk_for_each(csk, &data_sockets.head) { if (sk == csk) continue; if (_pms(csk)->dev != _pms(sk)->dev) diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index deda591f70b9..9cb4b621fbc3 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -64,12 +64,11 @@ unlock: static void send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb) { - struct hlist_node *node; struct sock *sk; struct sk_buff *cskb = NULL; read_lock(&sl->lock); - sk_for_each(sk, node, &sl->head) { + sk_for_each(sk, &sl->head) { if (sk->sk_state != MISDN_BOUND) continue; if (!cskb) diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index aefb78e3cbf9..d9d3f1c7b662 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -106,9 +106,8 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, struct dm_cell_key *key) { struct dm_bio_prison_cell *cell; - struct hlist_node *tmp; - hlist_for_each_entry(cell, tmp, bucket, list) + hlist_for_each_entry(cell, bucket, list) if (keys_equal(&cell->key, key)) return cell; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 651ca79881dd..93205e32a004 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -859,9 +859,8 @@ static void __check_watermark(struct dm_bufio_client *c) static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) { struct dm_buffer *b; - struct hlist_node *hn; - hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], + hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) { dm_bufio_cond_resched(); if (b->block == block) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 59fc18ae52c2..10079e07edf4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -227,12 +227,11 @@ static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) { struct dm_snap_tracked_chunk *c; - struct hlist_node *hn; int found = 0; spin_lock_irq(&s->tracked_chunk_lock); - hlist_for_each_entry(c, hn, + hlist_for_each_entry(c, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { if (c->chunk == chunk) { found = 1; diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 7b17a1fdeaf9..81da1a26042e 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -46,10 +46,9 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b) int r = 0; unsigned bucket = dm_hash_block(b, DM_HASH_MASK); struct shadow_info *si; - struct hlist_node *n; spin_lock(&tm->lock); - hlist_for_each_entry(si, n, tm->buckets + bucket, hlist) + hlist_for_each_entry(si, tm->buckets + bucket, hlist) if (si->where == b) { r = 1; break; @@ -81,14 +80,14 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) static void wipe_shadow_table(struct dm_transaction_manager *tm) { struct shadow_info *si; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct hlist_head *bucket; int i; spin_lock(&tm->lock); for (i = 0; i < DM_HASH_SIZE; i++) { bucket = tm->buckets + i; - hlist_for_each_entry_safe(si, n, tmp, bucket, hlist) + hlist_for_each_entry_safe(si, tmp, bucket, hlist) kfree(si); INIT_HLIST_HEAD(bucket); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 19d77a026639..697f026cb318 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -365,10 +365,9 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, short generation) { struct stripe_head *sh; - struct hlist_node *hn; pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); - hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) + hlist_for_each_entry(sh, stripe_hash(conf, sector), hash) if (sh->sector == sector && sh->generation == generation) return sh; pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 240a6d361665..2129274ef7ab 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -280,11 +280,10 @@ static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, const struct mmu_notifier_ops *ops) { struct mmu_notifier *mn, *gru_mn = NULL; - struct hlist_node *n; if (mm->mmu_notifier_mm) { rcu_read_lock(); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) if (mn->ops == ops) { gru_mn = mn; diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c index c3e8397f62ed..a8cee33ae8d2 100644 --- a/drivers/misc/vmw_vmci/vmci_doorbell.c +++ b/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -127,9 +127,8 @@ static struct dbell_entry *dbell_index_table_find(u32 idx) { u32 bucket = VMCI_DOORBELL_HASH(idx); struct dbell_entry *dbell; - struct hlist_node *node; - hlist_for_each_entry(dbell, node, &vmci_doorbell_it.entries[bucket], + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { if (idx == dbell->idx) return dbell; @@ -359,12 +358,10 @@ static void dbell_fire_entries(u32 notify_idx) { u32 bucket = VMCI_DOORBELL_HASH(notify_idx); struct dbell_entry *dbell; - struct hlist_node *node; spin_lock_bh(&vmci_doorbell_it.lock); - hlist_for_each_entry(dbell, node, - &vmci_doorbell_it.entries[bucket], node) { + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { if (dbell->idx == notify_idx && atomic_read(&dbell->active) == 1) { if (dbell->run_delayed) { diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c index a196f84a4fd2..9a53a30de445 100644 --- a/drivers/misc/vmw_vmci/vmci_resource.c +++ b/drivers/misc/vmw_vmci/vmci_resource.c @@ -46,11 +46,10 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle, enum vmci_resource_type type) { struct vmci_resource *r, *resource = NULL; - struct hlist_node *node; unsigned int idx = vmci_resource_hash(handle); rcu_read_lock(); - hlist_for_each_entry_rcu(r, node, + hlist_for_each_entry_rcu(r, &vmci_resource_table.entries[idx], node) { u32 cid = r->handle.context; u32 rid = r->handle.resource; @@ -146,12 +145,11 @@ void vmci_resource_remove(struct vmci_resource *resource) struct vmci_handle handle = resource->handle; unsigned int idx = vmci_resource_hash(handle); struct vmci_resource *r; - struct hlist_node *node; /* Remove resource from hash table. */ spin_lock(&vmci_resource_table.lock); - hlist_for_each_entry(r, node, &vmci_resource_table.entries[idx], node) { + hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) { if (vmci_handle_is_equal(r->handle, resource->handle)) { hlist_del_init_rcu(&r->node); break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f4d2e9e3c6d5..c3f1afd86906 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2197,13 +2197,13 @@ static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter, union ixgbe_atr_input *mask = &adapter->fdir_mask; struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *rule = NULL; /* report total rule count */ cmd->data = (1024 << adapter->fdir_pballoc) - 2; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { if (fsp->location <= rule->sw_idx) break; @@ -2264,14 +2264,14 @@ static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter, struct ethtool_rxnfc *cmd, u32 *rule_locs) { - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *rule; int cnt = 0; /* report total rule count */ cmd->data = (1024 << adapter->fdir_pballoc) - 2; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { if (cnt == cmd->rule_cnt) return -EMSGSIZE; @@ -2358,19 +2358,19 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, u16 sw_idx) { struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2, *parent; - struct ixgbe_fdir_filter *rule; + struct hlist_node *node2; + struct ixgbe_fdir_filter *rule, *parent; int err = -EINVAL; parent = NULL; rule = NULL; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { /* hash found, or no matching entry */ if (rule->sw_idx >= sw_idx) break; - parent = node; + parent = rule; } /* if there is an old rule occupying our place remove it */ @@ -2399,7 +2399,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, /* add filter to the list */ if (parent) - hlist_add_after(parent, &input->fdir_node); + hlist_add_after(&parent->fdir_node, &input->fdir_node); else hlist_add_head(&input->fdir_node, &adapter->fdir_filter_list); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 68478d6dfa2d..db5611ae407e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3891,7 +3891,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); @@ -3899,7 +3899,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) if (!hlist_empty(&adapter->fdir_filter_list)) ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { ixgbe_fdir_write_perfect_filter_82599(hw, &filter->filter, @@ -4356,12 +4356,12 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter) { - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { hlist_del(&filter->fdir_node); kfree(filter); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5385474bb526..bb4d8d99f36d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -225,11 +225,10 @@ static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { - struct hlist_node *elem; struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; - hlist_for_each_entry(filter, elem, + hlist_for_each_entry(filter, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { @@ -574,13 +573,13 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct hlist_head *bucket; unsigned int mac_hash; mac_hash = priv->dev->dev_addr[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, priv->dev->dev_addr)) { en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n", @@ -609,11 +608,11 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, struct hlist_head *bucket; unsigned int mac_hash; struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; u64 prev_mac_u64 = mlx4_en_mac_to_u64(prev_mac); bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, prev_mac)) { mlx4_en_uc_steer_release(priv, entry->mac, qpn, entry->reg_id); @@ -1019,7 +1018,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, { struct netdev_hw_addr *ha; struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; bool found; u64 mac; int err = 0; @@ -1035,7 +1034,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, /* find what to remove */ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { bucket = &priv->mac_hash[i]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { found = false; netdev_for_each_uc_addr(ha, dev) { if (ether_addr_equal_64bits(entry->mac, @@ -1078,7 +1077,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, netdev_for_each_uc_addr(ha, dev) { found = false; bucket = &priv->mac_hash[ha->addr[MLX4_EN_MAC_HASH_IDX]]; - hlist_for_each_entry(entry, n, bucket, hlist) { + hlist_for_each_entry(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, ha->addr)) { found = true; break; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ce38654bbdd0..c7f856308e1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -617,7 +618,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; - struct hlist_node *n; struct hlist_head *bucket; unsigned int mac_hash; @@ -625,7 +625,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; rcu_read_lock(); - hlist_for_each_entry_rcu(entry, n, bucket, hlist) { + hlist_for_each_entry_rcu(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, ethh->h_source)) { rcu_read_unlock(); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 325e11e1ce0f..f89cc7a3fe6c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -576,7 +576,7 @@ void qlcnic_free_mac_list(struct qlcnic_adapter *adapter) void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) { struct qlcnic_filter *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; int i; unsigned long time; @@ -584,7 +584,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) for (i = 0; i < adapter->fhash.fbucket_size; i++) { head = &(adapter->fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { cmd = tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL; time = tmp_fil->ftime; @@ -604,7 +604,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) for (i = 0; i < adapter->rx_fhash.fbucket_size; i++) { head = &(adapter->rx_fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { time = tmp_fil->ftime; if (jiffies > (QLCNIC_FILTER_AGE * HZ + time)) { @@ -621,14 +621,14 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) void qlcnic_delete_lb_filters(struct qlcnic_adapter *adapter) { struct qlcnic_filter *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; int i; u8 cmd; for (i = 0; i < adapter->fhash.fbucket_size; i++) { head = &(adapter->fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { cmd = tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL; qlcnic_sre_macaddr_change(adapter, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 6387e0cc3ea9..0e630061bff3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -162,7 +162,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, { struct ethhdr *phdr = (struct ethhdr *)(skb->data); struct qlcnic_filter *fil, *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; unsigned long time; u64 src_addr = 0; @@ -179,7 +179,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, (adapter->fhash.fbucket_size - 1); head = &(adapter->rx_fhash.fhead[hindex]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { time = tmp_fil->ftime; @@ -205,7 +205,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, (adapter->fhash.fbucket_size - 1); head = &(adapter->rx_fhash.fhead[hindex]); spin_lock(&adapter->rx_mac_learn_lock); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { found = 1; @@ -272,7 +272,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb) { struct qlcnic_filter *fil, *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; struct net_device *netdev = adapter->netdev; struct ethhdr *phdr = (struct ethhdr *)(skb->data); @@ -294,7 +294,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, hindex = qlcnic_mac_hash(src_addr) & (adapter->fhash.fbucket_size - 1); head = &(adapter->fhash.fhead[hindex]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 289b4eefb42f..1df0ff3839e8 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -614,10 +614,9 @@ struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) { unsigned int hash = vnet_hashfn(skb->data); struct hlist_head *hp = &vp->port_hash[hash]; - struct hlist_node *n; struct vnet_port *port; - hlist_for_each_entry(port, n, hp, hash) { + hlist_for_each_entry(port, hp, hash) { if (ether_addr_equal(port->raddr, skb->data)) return port; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index defcd8a85744..417b2af1aa80 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -55,9 +55,8 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, const unsigned char *addr) { struct macvlan_dev *vlan; - struct hlist_node *n; - hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[addr[5]], hlist) { + hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) { if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr)) return vlan; } @@ -149,7 +148,6 @@ static void macvlan_broadcast(struct sk_buff *skb, { const struct ethhdr *eth = eth_hdr(skb); const struct macvlan_dev *vlan; - struct hlist_node *n; struct sk_buff *nskb; unsigned int i; int err; @@ -159,7 +157,7 @@ static void macvlan_broadcast(struct sk_buff *skb, return; for (i = 0; i < MACVLAN_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[i], hlist) { + hlist_for_each_entry_rcu(vlan, &port->vlan_hash[i], hlist) { if (vlan->dev == src || !(vlan->mode & mode)) continue; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b6f45c5d84d5..2c6a22e278ea 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -197,9 +197,8 @@ static inline u32 tun_hashfn(u32 rxhash) static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) { struct tun_flow_entry *e; - struct hlist_node *n; - hlist_for_each_entry_rcu(e, n, head, hash_link) { + hlist_for_each_entry_rcu(e, head, hash_link) { if (e->rxhash == rxhash) return e; } @@ -241,9 +240,9 @@ static void tun_flow_flush(struct tun_struct *tun) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) tun_flow_delete(tun, e); } spin_unlock_bh(&tun->lock); @@ -256,9 +255,9 @@ static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { if (e->queue_index == queue_index) tun_flow_delete(tun, e); } @@ -279,9 +278,9 @@ static void tun_flow_cleanup(unsigned long data) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { unsigned long this_timer; count++; this_timer = e->updated + delay; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f736823f8437..f10e58ac9c1b 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -145,9 +145,8 @@ static inline struct hlist_head *vni_head(struct net *net, u32 id) static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) { struct vxlan_dev *vxlan; - struct hlist_node *node; - hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) { + hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) { if (vxlan->vni == id) return vxlan; } @@ -292,9 +291,8 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, { struct hlist_head *head = vxlan_fdb_head(vxlan, mac); struct vxlan_fdb *f; - struct hlist_node *node; - hlist_for_each_entry_rcu(f, node, head, hlist) { + hlist_for_each_entry_rcu(f, head, hlist) { if (compare_ether_addr(mac, f->eth_addr) == 0) return f; } @@ -422,10 +420,9 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; - struct hlist_node *n; int err; - hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) { + hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { if (idx < cb->args[0]) goto skip; @@ -483,11 +480,10 @@ static bool vxlan_group_used(struct vxlan_net *vn, const struct vxlan_dev *this) { const struct vxlan_dev *vxlan; - struct hlist_node *node; unsigned h; for (h = 0; h < VNI_HASH_SIZE; ++h) - hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) { + hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) { if (vxlan == this) continue; diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c index 48273dd05b63..4941f201d6c8 100644 --- a/drivers/net/wireless/zd1201.c +++ b/drivers/net/wireless/zd1201.c @@ -309,7 +309,6 @@ static void zd1201_usbrx(struct urb *urb) if (data[urb->actual_length-1] == ZD1201_PACKET_RXDATA) { int datalen = urb->actual_length-1; unsigned short len, fc, seq; - struct hlist_node *node; len = ntohs(*(__be16 *)&data[datalen-2]); if (len>datalen) @@ -362,7 +361,7 @@ static void zd1201_usbrx(struct urb *urb) hlist_add_head(&frag->fnode, &zd->fraglist); goto resubmit; } - hlist_for_each_entry(frag, node, &zd->fraglist, fnode) + hlist_for_each_entry(frag, &zd->fraglist, fnode) if (frag->seq == (seq&IEEE80211_SCTL_SEQ)) break; if (!frag) @@ -1831,14 +1830,14 @@ err_zd: static void zd1201_disconnect(struct usb_interface *interface) { struct zd1201 *zd = usb_get_intfdata(interface); - struct hlist_node *node, *node2; + struct hlist_node *node2; struct zd1201_frag *frag; if (!zd) return; usb_set_intfdata(interface, NULL); - hlist_for_each_entry_safe(frag, node, node2, &zd->fraglist, fnode) { + hlist_for_each_entry_safe(frag, node2, &zd->fraglist, fnode) { hlist_del_init(&frag->fnode); kfree_skb(frag->skb); kfree(frag); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 924e4665bd57..b099e0025d2b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -842,9 +842,8 @@ static struct pci_cap_saved_state *pci_find_saved_cap( struct pci_dev *pci_dev, char cap) { struct pci_cap_saved_state *tmp; - struct hlist_node *pos; - hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) { + hlist_for_each_entry(tmp, &pci_dev->saved_cap_space, next) { if (tmp->cap.cap_nr == cap) return tmp; } @@ -1041,7 +1040,6 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) struct pci_saved_state *state; struct pci_cap_saved_state *tmp; struct pci_cap_saved_data *cap; - struct hlist_node *pos; size_t size; if (!dev->state_saved) @@ -1049,7 +1047,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) size = sizeof(*state) + sizeof(struct pci_cap_saved_data); - hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) + hlist_for_each_entry(tmp, &dev->saved_cap_space, next) size += sizeof(struct pci_cap_saved_data) + tmp->cap.size; state = kzalloc(size, GFP_KERNEL); @@ -1060,7 +1058,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) sizeof(state->config_space)); cap = state->cap; - hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) { + hlist_for_each_entry(tmp, &dev->saved_cap_space, next) { size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size; memcpy(cap, &tmp->cap, len); cap = (struct pci_cap_saved_data *)((u8 *)cap + len); @@ -2038,9 +2036,9 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) void pci_free_cap_save_buffers(struct pci_dev *dev) { struct pci_cap_saved_state *tmp; - struct hlist_node *pos, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(tmp, pos, n, &dev->saved_cap_space, next) + hlist_for_each_entry_safe(tmp, n, &dev->saved_cap_space, next) kfree(tmp); } diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 538ebe213129..24456a0de6b2 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -2880,7 +2880,6 @@ static int binder_release(struct inode *nodp, struct file *filp) static void binder_deferred_release(struct binder_proc *proc) { - struct hlist_node *pos; struct binder_transaction *t; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count; @@ -2924,7 +2923,7 @@ static void binder_deferred_release(struct binder_proc *proc) node->local_weak_refs = 0; hlist_add_head(&node->dead_node, &binder_dead_nodes); - hlist_for_each_entry(ref, pos, &node->refs, node_entry) { + hlist_for_each_entry(ref, &node->refs, node_entry) { incoming_refs++; if (ref->death) { death++; @@ -3156,12 +3155,11 @@ static void print_binder_thread(struct seq_file *m, static void print_binder_node(struct seq_file *m, struct binder_node *node) { struct binder_ref *ref; - struct hlist_node *pos; struct binder_work *w; int count; count = 0; - hlist_for_each_entry(ref, pos, &node->refs, node_entry) + hlist_for_each_entry(ref, &node->refs, node_entry) count++; seq_printf(m, " node %d: u%p c%p hs %d hw %d ls %d lw %d is %d iw %d", @@ -3171,7 +3169,7 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) node->internal_strong_refs, count); if (count) { seq_puts(m, " proc"); - hlist_for_each_entry(ref, pos, &node->refs, node_entry) + hlist_for_each_entry(ref, &node->refs, node_entry) seq_printf(m, " %d", ref->proc->pid); } seq_puts(m, "\n"); @@ -3369,7 +3367,6 @@ static void print_binder_proc_stats(struct seq_file *m, static int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; struct binder_node *node; int do_lock = !binder_debug_no_lock; @@ -3380,10 +3377,10 @@ static int binder_state_show(struct seq_file *m, void *unused) if (!hlist_empty(&binder_dead_nodes)) seq_puts(m, "dead nodes:\n"); - hlist_for_each_entry(node, pos, &binder_dead_nodes, dead_node) + hlist_for_each_entry(node, &binder_dead_nodes, dead_node) print_binder_node(m, node); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 1); if (do_lock) binder_unlock(__func__); @@ -3393,7 +3390,6 @@ static int binder_state_show(struct seq_file *m, void *unused) static int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; int do_lock = !binder_debug_no_lock; if (do_lock) @@ -3403,7 +3399,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) print_binder_stats(m, "", &binder_stats); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc_stats(m, proc); if (do_lock) binder_unlock(__func__); @@ -3413,14 +3409,13 @@ static int binder_stats_show(struct seq_file *m, void *unused) static int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; int do_lock = !binder_debug_no_lock; if (do_lock) binder_lock(__func__); seq_puts(m, "binder transactions:\n"); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 0); if (do_lock) binder_unlock(__func__); diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 6659dd36e806..113f33598b9f 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -169,7 +169,6 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id) { struct ft_tport *tport; struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; rcu_read_lock(); @@ -178,7 +177,7 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id) goto out; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { if (sess->port_id == port_id) { kref_get(&sess->kref); rcu_read_unlock(); @@ -201,10 +200,9 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id, { struct ft_sess *sess; struct hlist_head *head; - struct hlist_node *pos; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) + hlist_for_each_entry_rcu(sess, head, hash) if (sess->port_id == port_id) return sess; @@ -253,11 +251,10 @@ static void ft_sess_unhash(struct ft_sess *sess) static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id) { struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { if (sess->port_id == port_id) { ft_sess_unhash(sess); return sess; @@ -273,12 +270,11 @@ static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id) static void ft_sess_delete_all(struct ft_tport *tport) { struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; for (head = tport->hash; head < &tport->hash[FT_SESS_HASH_SIZE]; head++) { - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { ft_sess_unhash(sess); transport_deregister_session_configfs(sess->se_sess); ft_sess_put(sess); /* release from table */ diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index eb82ee53ee0b..d9a43674cb94 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -125,9 +125,8 @@ static void affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; - struct hlist_node *p; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; diff --git a/fs/aio.c b/fs/aio.c index 064bfbe37566..3f941f2a3059 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -591,11 +591,10 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; - struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { + hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { /* * RCU protects us against accessing freed memory but * we have to be careful not to get a reference when the diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d2a833999bcc..83f2606c76d0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -816,10 +816,9 @@ static bool inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - struct hlist_node *p; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/dcache.c b/fs/dcache.c index 68220dd0c135..fbfae008ba44 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -675,11 +675,10 @@ EXPORT_SYMBOL(dget_parent); static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; - struct hlist_node *p; again: discon_alias = NULL; - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -730,10 +729,9 @@ EXPORT_SYMBOL(d_find_alias); void d_prune_aliases(struct inode *inode) { struct dentry *dentry; - struct hlist_node *p; restart: spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); @@ -1443,14 +1441,13 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; - struct hlist_node *p; if (!inode) { __d_instantiate(entry, NULL); return NULL; } - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index dd87a31bcc21..4f5ad246582f 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -177,12 +177,11 @@ static inline int nodeid_hash(int nodeid) static struct connection *__find_con(int nodeid) { int r; - struct hlist_node *h; struct connection *con; r = nodeid_hash(nodeid); - hlist_for_each_entry(con, h, &connection_hash[r], list) { + hlist_for_each_entry(con, &connection_hash[r], list) { if (con->nodeid == nodeid) return con; } @@ -232,13 +231,12 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) static void foreach_conn(void (*conn_func)(struct connection *c)) { int i; - struct hlist_node *h, *n; + struct hlist_node *n; struct connection *con; for (i = 0; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ + hlist_for_each_entry_safe(con, n, &connection_hash[i], list) conn_func(con); - } } } @@ -257,13 +255,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) static struct connection *assoc2con(int assoc_id) { int i; - struct hlist_node *h; struct connection *con; mutex_lock(&connections_lock); for (i = 0 ; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry(con, h, &connection_hash[i], list) { + hlist_for_each_entry(con, &connection_hash[i], list) { if (con->sctp_assoc == assoc_id) { mutex_unlock(&connections_lock); return con; diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 5fa2471796c2..8d7a577ae497 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -115,10 +115,9 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) */ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) { - struct hlist_node *elem; int rc; - hlist_for_each_entry(*daemon, elem, + hlist_for_each_entry(*daemon, &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], euid_chain) { if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) { @@ -445,7 +444,6 @@ void ecryptfs_release_messaging(void) mutex_unlock(&ecryptfs_msg_ctx_lists_mux); } if (ecryptfs_daemon_hash) { - struct hlist_node *elem; struct ecryptfs_daemon *daemon; int i; @@ -453,7 +451,7 @@ void ecryptfs_release_messaging(void) for (i = 0; i < (1 << ecryptfs_hash_bits); i++) { int rc; - hlist_for_each_entry(daemon, elem, + hlist_for_each_entry(daemon, &ecryptfs_daemon_hash[i], euid_chain) { rc = ecryptfs_exorcise_daemon(daemon); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 5df4bb4aab14..262fc9940982 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -44,14 +44,13 @@ find_acceptable_alias(struct dentry *result, { struct dentry *dentry, *toput = NULL; struct inode *inode; - struct hlist_node *p; if (acceptable(context, result)) return result; inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 780e20806346..acf6e479b443 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -341,12 +341,11 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); - struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; spin_lock(&sbi->inode_hash_lock); - hlist_for_each_entry(i, _p, head, i_fat_hash) { + hlist_for_each_entry(i, head, i_fat_hash) { BUG_ON(i->vfs_inode.i_sb != sb); if (i->i_pos != i_pos) continue; diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index ef4b5faba87b..499c10438ca2 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -21,13 +21,12 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct hlist_head *head; - struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; head = sbi->dir_hashtable + fat_dir_hash(i_logstart); spin_lock(&sbi->dir_hash_lock); - hlist_for_each_entry(i, _p, head, i_dir_hash) { + hlist_for_each_entry(i, head, i_dir_hash) { BUG_ON(i->vfs_inode.i_sb != sb); if (i->i_logstart != i_logstart) continue; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 8dcb114758e3..e2cba1f60c21 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -237,13 +237,12 @@ static int fscache_alloc_object(struct fscache_cache *cache, struct fscache_cookie *cookie) { struct fscache_object *object; - struct hlist_node *_n; int ret; _enter("%p,%p{%s}", cache, cookie, cookie->def->name); spin_lock(&cookie->lock); - hlist_for_each_entry(object, _n, &cookie->backing_objects, + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { if (object->cache == cache) goto object_already_extant; @@ -311,7 +310,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie, { struct fscache_object *p; struct fscache_cache *cache = object->cache; - struct hlist_node *_n; int ret; _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); @@ -321,7 +319,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* there may be multiple initial creations of this object, but we only * want one */ ret = -EEXIST; - hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) { + hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) { if (p->cache == object->cache) { if (p->state >= FSCACHE_OBJECT_DYING) ret = -ENOBUFS; @@ -331,7 +329,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* pin the parent object */ spin_lock_nested(&cookie->parent->lock, 1); - hlist_for_each_entry(p, _n, &cookie->parent->backing_objects, + hlist_for_each_entry(p, &cookie->parent->backing_objects, cookie_link) { if (p->cache == object->cache) { if (p->state >= FSCACHE_OBJECT_DYING) { @@ -435,7 +433,6 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate); void __fscache_update_cookie(struct fscache_cookie *cookie) { struct fscache_object *object; - struct hlist_node *_p; fscache_stat(&fscache_n_updates); @@ -452,7 +449,7 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) spin_lock(&cookie->lock); /* update the index entry on disk in each cache backing this cookie */ - hlist_for_each_entry(object, _p, + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); } diff --git a/fs/inode.c b/fs/inode.c index 67880e604399..f5f7c06c36fb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -798,11 +798,10 @@ static struct inode *find_inode(struct super_block *sb, int (*test)(struct inode *, void *), void *data) { - struct hlist_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, node, head, i_hash) { + hlist_for_each_entry(inode, head, i_hash) { spin_lock(&inode->i_lock); if (inode->i_sb != sb) { spin_unlock(&inode->i_lock); @@ -830,11 +829,10 @@ repeat: static struct inode *find_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) { - struct hlist_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, node, head, i_hash) { + hlist_for_each_entry(inode, head, i_hash) { spin_lock(&inode->i_lock); if (inode->i_ino != ino) { spin_unlock(&inode->i_lock); @@ -1132,11 +1130,10 @@ EXPORT_SYMBOL(iget_locked); static int test_inode_iunique(struct super_block *sb, unsigned long ino) { struct hlist_head *b = inode_hashtable + hash(sb, ino); - struct hlist_node *node; struct inode *inode; spin_lock(&inode_hash_lock); - hlist_for_each_entry(inode, node, b, i_hash) { + hlist_for_each_entry(inode, b, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb) { spin_unlock(&inode_hash_lock); return 0; @@ -1291,10 +1288,9 @@ int insert_inode_locked(struct inode *inode) struct hlist_head *head = inode_hashtable + hash(sb, ino); while (1) { - struct hlist_node *node; struct inode *old = NULL; spin_lock(&inode_hash_lock); - hlist_for_each_entry(old, node, head, i_hash) { + hlist_for_each_entry(old, head, i_hash) { if (old->i_ino != ino) continue; if (old->i_sb != sb) @@ -1306,7 +1302,7 @@ int insert_inode_locked(struct inode *inode) } break; } - if (likely(!node)) { + if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); @@ -1334,11 +1330,10 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, struct hlist_head *head = inode_hashtable + hash(sb, hashval); while (1) { - struct hlist_node *node; struct inode *old = NULL; spin_lock(&inode_hash_lock); - hlist_for_each_entry(old, node, head, i_hash) { + hlist_for_each_entry(old, head, i_hash) { if (old->i_sb != sb) continue; if (!test(old, data)) @@ -1350,7 +1345,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, } break; } - if (likely(!node)) { + if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0e17090c310f..abdd75d44dd4 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -32,15 +32,15 @@ static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH]; static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH]; -#define for_each_host(host, pos, chain, table) \ +#define for_each_host(host, chain, table) \ for ((chain) = (table); \ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ - hlist_for_each_entry((host), (pos), (chain), h_hash) + hlist_for_each_entry((host), (chain), h_hash) -#define for_each_host_safe(host, pos, next, chain, table) \ +#define for_each_host_safe(host, next, chain, table) \ for ((chain) = (table); \ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ - hlist_for_each_entry_safe((host), (pos), (next), \ + hlist_for_each_entry_safe((host), (next), \ (chain), h_hash) static unsigned long nrhosts; @@ -225,7 +225,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .net = net, }; struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; struct nsm_handle *nsm = NULL; struct lockd_net *ln = net_generic(net, lockd_net_id); @@ -237,7 +236,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, mutex_lock(&nlm_host_mutex); chain = &nlm_client_hosts[nlm_hash_address(sap)]; - hlist_for_each_entry(host, pos, chain, h_hash) { + hlist_for_each_entry(host, chain, h_hash) { if (host->net != net) continue; if (!rpc_cmp_addr(nlm_addr(host), sap)) @@ -322,7 +321,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, const size_t hostname_len) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host = NULL; struct nsm_handle *nsm = NULL; struct sockaddr *src_sap = svc_daddr(rqstp); @@ -350,7 +348,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, nlm_gc_hosts(net); chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; - hlist_for_each_entry(host, pos, chain, h_hash) { + hlist_for_each_entry(host, chain, h_hash) { if (host->net != net) continue; if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) @@ -515,10 +513,9 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, { struct nlm_host *host; struct hlist_head *chain; - struct hlist_node *pos; mutex_lock(&nlm_host_mutex); - for_each_host(host, pos, chain, cache) { + for_each_host(host, chain, cache) { if (host->h_nsmhandle == nsm && host->h_nsmstate != info->state) { host->h_nsmstate = info->state; @@ -570,7 +567,6 @@ void nlm_host_rebooted(const struct nlm_reboot *info) static void nlm_complain_hosts(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; if (net) { @@ -587,7 +583,7 @@ static void nlm_complain_hosts(struct net *net) dprintk("lockd: %lu hosts left:\n", nrhosts); } - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; dprintk(" %s (cnt %d use %d exp %ld net %p)\n", @@ -600,14 +596,13 @@ void nlm_shutdown_hosts_net(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; mutex_lock(&nlm_host_mutex); /* First, make all hosts eligible for gc */ dprintk("lockd: nuking all hosts in net %p...\n", net); - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; host->h_expires = jiffies - 1; @@ -644,11 +639,11 @@ static void nlm_gc_hosts(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos, *next; + struct hlist_node *next; struct nlm_host *host; dprintk("lockd: host garbage collection for net %p\n", net); - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; host->h_inuse = 0; @@ -657,7 +652,7 @@ nlm_gc_hosts(struct net *net) /* Mark all hosts that hold locks, blocks or shares */ nlmsvc_mark_resources(net); - for_each_host_safe(host, pos, next, chain, nlm_server_hosts) { + for_each_host_safe(host, next, chain, nlm_server_hosts) { if (net && host->net != net) continue; if (atomic_read(&host->h_count) || host->h_inuse diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index b3a24b07d981..d17bb62b06d6 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -84,7 +84,6 @@ __be32 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, struct nfs_fh *f) { - struct hlist_node *pos; struct nlm_file *file; unsigned int hash; __be32 nfserr; @@ -96,7 +95,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, /* Lock file table */ mutex_lock(&nlm_file_mutex); - hlist_for_each_entry(file, pos, &nlm_files[hash], f_list) + hlist_for_each_entry(file, &nlm_files[hash], f_list) if (!nfs_compare_fh(&file->f_handle, f)) goto found; @@ -248,13 +247,13 @@ static int nlm_traverse_files(void *data, nlm_host_match_fn_t match, int (*is_failover_file)(void *data, struct nlm_file *file)) { - struct hlist_node *pos, *next; + struct hlist_node *next; struct nlm_file *file; int i, ret = 0; mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { - hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { + hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) { if (is_failover_file && !is_failover_file(data, file)) continue; file->f_count++; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index d35b62e83ea6..6da209bd9408 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->ld == ld && d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { if (atomic_read(&d->ref)) @@ -248,12 +247,11 @@ static void _deviceid_purge_client(const struct nfs_client *clp, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; HLIST_HEAD(tmp); spin_lock(&nfs4_deviceid_lock); rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->nfs_client == clp && atomic_read(&d->ref)) { hlist_del_init_rcu(&d->node); hlist_add_head(&d->tmpnode, &tmp); @@ -291,12 +289,11 @@ void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) { struct nfs4_deviceid_node *d; - struct hlist_node *n; int i; rcu_read_lock(); for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node) if (d->nfs_client == clp) set_bit(NFS_DEVICEID_INVALID, &d->flags); } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2cbac34a55da..da3dbd0f8979 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -120,7 +120,6 @@ hash_refile(struct svc_cacherep *rp) int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_node *hn; struct hlist_head *rh; struct svc_cacherep *rp; __be32 xid = rqstp->rq_xid; @@ -141,7 +140,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rtn = RC_DOIT; rh = &cache_hash[request_hash(xid)]; - hlist_for_each_entry(rp, hn, rh, c_hash) { + hlist_for_each_entry(rp, rh, c_hash) { if (rp->c_state != RC_UNUSED && xid == rp->c_xid && proc == rp->c_proc && proto == rp->c_prot && vers == rp->c_vers && diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 6baadb5a8430..4bb21d67d9b1 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -52,7 +52,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) void __fsnotify_update_child_dentry_flags(struct inode *inode) { struct dentry *alias; - struct hlist_node *p; int watched; if (!S_ISDIR(inode->i_mode)) @@ -64,7 +63,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index f31e90fc050d..74825be65b7b 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -36,12 +36,11 @@ static void fsnotify_recalc_inode_mask_locked(struct inode *inode) { struct fsnotify_mark *mark; - struct hlist_node *pos; __u32 new_mask = 0; assert_spin_locked(&inode->i_lock); - hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) + hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) new_mask |= mark->mask; inode->i_fsnotify_mask = new_mask; } @@ -87,11 +86,11 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) void fsnotify_clear_marks_by_inode(struct inode *inode) { struct fsnotify_mark *mark, *lmark; - struct hlist_node *pos, *n; + struct hlist_node *n; LIST_HEAD(free_list); spin_lock(&inode->i_lock); - hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) { + hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) { list_add(&mark->i.free_i_list, &free_list); hlist_del_init_rcu(&mark->i.i_list); fsnotify_get_mark(mark); @@ -129,11 +128,10 @@ static struct fsnotify_mark *fsnotify_find_inode_mark_locked( struct inode *inode) { struct fsnotify_mark *mark; - struct hlist_node *pos; assert_spin_locked(&inode->i_lock); - hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) { + hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) { if (mark->group == group) { fsnotify_get_mark(mark); return mark; @@ -194,8 +192,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, int allow_dups) { - struct fsnotify_mark *lmark; - struct hlist_node *node, *last = NULL; + struct fsnotify_mark *lmark, *last = NULL; int ret = 0; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -214,8 +211,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) { - last = node; + hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) { + last = lmark; if ((lmark->group == group) && !allow_dups) { ret = -EEXIST; @@ -235,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(last, &mark->i.i_list); + hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list); out: fsnotify_recalc_inode_mask_locked(inode); spin_unlock(&inode->i_lock); diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 4df58b8ea64a..68ca5a8704b5 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -33,12 +33,12 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { struct fsnotify_mark *mark, *lmark; - struct hlist_node *pos, *n; + struct hlist_node *n; struct mount *m = real_mount(mnt); LIST_HEAD(free_list); spin_lock(&mnt->mnt_root->d_lock); - hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) { + hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) { list_add(&mark->m.free_m_list, &free_list); hlist_del_init_rcu(&mark->m.m_list); fsnotify_get_mark(mark); @@ -71,12 +71,11 @@ static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt) { struct mount *m = real_mount(mnt); struct fsnotify_mark *mark; - struct hlist_node *pos; __u32 new_mask = 0; assert_spin_locked(&mnt->mnt_root->d_lock); - hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) + hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) new_mask |= mark->mask; m->mnt_fsnotify_mask = new_mask; } @@ -114,11 +113,10 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_ { struct mount *m = real_mount(mnt); struct fsnotify_mark *mark; - struct hlist_node *pos; assert_spin_locked(&mnt->mnt_root->d_lock); - hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) { + hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) { if (mark->group == group) { fsnotify_get_mark(mark); return mark; @@ -153,8 +151,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, int allow_dups) { struct mount *m = real_mount(mnt); - struct fsnotify_mark *lmark; - struct hlist_node *node, *last = NULL; + struct fsnotify_mark *lmark, *last = NULL; int ret = 0; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -173,8 +170,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) { - last = node; + hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) { + last = lmark; if ((lmark->group == group) && !allow_dups) { ret = -EEXIST; @@ -194,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(last, &mark->m.m_list); + hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list); out: fsnotify_recalc_vfsmount_mask_locked(mnt); spin_unlock(&mnt->mnt_root->d_lock); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 8db4b58b2e4b..ef999729e274 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -169,11 +169,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { - struct hlist_node *p; struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 01ebfd0bdad7..eeac97bb3bfa 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2083,7 +2083,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, u8 dead_node, u8 new_master) { int i; - struct hlist_node *hash_iter; struct hlist_head *bucket; struct dlm_lock_resource *res, *next; @@ -2114,7 +2113,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, * if necessary */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, hash_iter, bucket, hash_node) { + hlist_for_each_entry(res, bucket, hash_node) { if (!(res->state & DLM_LOCK_RES_RECOVERING)) continue; @@ -2273,7 +2272,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) { - struct hlist_node *iter; struct dlm_lock_resource *res; int i; struct hlist_head *bucket; @@ -2299,7 +2297,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, iter, bucket, hash_node) { + hlist_for_each_entry(res, bucket, hash_node) { /* always prune any $RECOVERY entries for dead nodes, * otherwise hangs can occur during later recovery */ if (dlm_is_recovery_lock(res->lockname.name, diff --git a/fs/super.c b/fs/super.c index df6c2f4c6b59..7465d4364208 100644 --- a/fs/super.c +++ b/fs/super.c @@ -447,14 +447,13 @@ struct super_block *sget(struct file_system_type *type, void *data) { struct super_block *s = NULL; - struct hlist_node *node; struct super_block *old; int err; retry: spin_lock(&sb_lock); if (test) { - hlist_for_each_entry(old, node, &type->fs_supers, s_instances) { + hlist_for_each_entry(old, &type->fs_supers, s_instances) { if (!test(old, data)) continue; if (!grab_super(old)) @@ -554,10 +553,9 @@ void iterate_supers_type(struct file_system_type *type, void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; - struct hlist_node *node; spin_lock(&sb_lock); - hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) { + hlist_for_each_entry(sb, &type->fs_supers, s_instances) { sb->s_count++; spin_unlock(&sb_lock); diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 2ce9a5db6ab5..15c68f9489ae 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -461,14 +461,13 @@ const struct file_operations bin_fops = { void unmap_bin_file(struct sysfs_dirent *attr_sd) { struct bin_buffer *bb; - struct hlist_node *tmp; if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) return; mutex_lock(&sysfs_bin_lock); - hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) { + hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) { struct inode *inode = file_inode(bb->file); unmap_mapping_range(inode->i_mapping, 0, 0, 1); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 96fcbb85ff83..d1dba7ce75ae 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1442,9 +1442,8 @@ xlog_recover_find_tid( xlog_tid_t tid) { xlog_recover_t *trans; - struct hlist_node *n; - hlist_for_each_entry(trans, n, head, r_list) { + hlist_for_each_entry(trans, head, r_list) { if (trans->r_log_tid == tid) return trans; } diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 227c62424f3c..a9df51f5d54c 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -115,51 +115,50 @@ static inline void hash_del_rcu(struct hlist_node *node) * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry(obj, node, &name[bkt], member) +#define hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_rcu(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_rcu(obj, node, &name[bkt], member) +#define hash_for_each_rcu(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_safe(name, bkt, node, tmp, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_safe(obj, node, tmp, &name[bkt], member) +#define hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible(name, obj, node, member, key) \ - hlist_for_each_entry(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the @@ -167,25 +166,24 @@ static inline void hash_del_rcu(struct hlist_node *node) * in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_rcu(name, obj, node, member, key) \ - hlist_for_each_entry_rcu(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible_rcu(name, obj, member, key) \ + hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ + member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_safe(name, obj, node, tmp, member, key) \ - hlist_for_each_entry_safe(obj, node, tmp, \ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 4648d8021244..cfd21e3d5506 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -216,11 +216,10 @@ static inline struct hlist_head *team_port_index_hash(struct team *team, static inline struct team_port *team_get_port_by_index(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry(port, p, head, hlist) + hlist_for_each_entry(port, head, hlist) if (port->index == port_index) return port; return NULL; @@ -228,11 +227,10 @@ static inline struct team_port *team_get_port_by_index(struct team *team, static inline struct team_port *team_get_port_by_index_rcu(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry_rcu(port, p, head, hlist) + hlist_for_each_entry_rcu(port, head, hlist) if (port->index == port_index) return port; return NULL; diff --git a/include/linux/list.h b/include/linux/list.h index cc6d2aa6b415..d991cc147c98 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -666,54 +666,49 @@ static inline void hlist_move_list(struct hlist_head *old, for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ pos = n) +#define hlist_entry_safe(ptr, type, member) \ + (ptr) ? hlist_entry(ptr, type, member) : NULL + /** * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_continue - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_continue(pos, member) \ + for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_from - iterate over a hlist continuing from current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_from(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @n: another &struct hlist_node to use as temporary storage * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) +#define hlist_for_each_entry_safe(pos, n, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\ + pos && ({ n = pos->member.next; 1; }); \ + pos = hlist_entry_safe(n, typeof(*pos), member)) #endif diff --git a/include/linux/pid.h b/include/linux/pid.h index 2381c973d897..a089a3c447fc 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -176,9 +176,8 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ - struct hlist_node *pos___; \ if ((pid) != NULL) \ - hlist_for_each_entry_rcu((task), pos___, \ + hlist_for_each_entry_rcu((task), \ &(pid)->tasks[type], pids[type].node) { /* diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c92dd28eaa6c..8089e35d47ac 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -445,8 +445,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, /** * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -454,16 +453,16 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(hlist_next_rcu(pos))) +#define hlist_for_each_entry_rcu(pos, head, member) \ + for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -471,35 +470,36 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ - for (pos = rcu_dereference_bh((head)->first); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_rcu_bh(pos, head, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ - for (pos = rcu_dereference((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(pos->next)) +#define hlist_for_each_entry_continue_rcu(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ - for (pos = rcu_dereference_bh((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_continue_rcu_bh(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member)) #endif /* __KERNEL__ */ diff --git a/include/net/ax25.h b/include/net/ax25.h index 53539acbd81a..89ed9ac5701f 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -161,8 +161,8 @@ typedef struct ax25_uid_assoc { ax25_address call; } ax25_uid_assoc; -#define ax25_uid_for_each(__ax25, node, list) \ - hlist_for_each_entry(__ax25, node, list, uid_node) +#define ax25_uid_for_each(__ax25, list) \ + hlist_for_each_entry(__ax25, list, uid_node) #define ax25_uid_hold(ax25) \ atomic_inc(&((ax25)->refcount)) @@ -247,8 +247,8 @@ typedef struct ax25_cb { #define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo) -#define ax25_for_each(__ax25, node, list) \ - hlist_for_each_entry(__ax25, node, list, ax25_node) +#define ax25_for_each(__ax25, list) \ + hlist_for_each_entry(__ax25, list, ax25_node) #define ax25_cb_hold(__ax25) \ atomic_inc(&((__ax25)->refcount)) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 7b2ae9d37076..ef83d9e844b5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -94,8 +94,8 @@ static inline struct net *ib_net(struct inet_bind_bucket *ib) return read_pnet(&ib->ib_net); } -#define inet_bind_bucket_for_each(tb, pos, head) \ - hlist_for_each_entry(tb, pos, head, node) +#define inet_bind_bucket_for_each(tb, head) \ + hlist_for_each_entry(tb, head, node) struct inet_bind_hashbucket { spinlock_t lock; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 7d658d577368..f908dfc06505 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -178,11 +178,11 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) #define inet_twsk_for_each(tw, node, head) \ hlist_nulls_for_each_entry(tw, node, head, tw_node) -#define inet_twsk_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) +#define inet_twsk_for_each_inmate(tw, jail) \ + hlist_for_each_entry(tw, jail, tw_death_node) -#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) +#define inet_twsk_for_each_inmate_safe(tw, safe, jail) \ + hlist_for_each_entry_safe(tw, safe, jail, tw_death_node) static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { diff --git a/include/net/netrom.h b/include/net/netrom.h index f0793c1cb5f8..121dcf854db5 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -154,17 +154,17 @@ static __inline__ void nr_node_unlock(struct nr_node *nr_node) nr_node_put(nr_node); } -#define nr_neigh_for_each(__nr_neigh, node, list) \ - hlist_for_each_entry(__nr_neigh, node, list, neigh_node) +#define nr_neigh_for_each(__nr_neigh, list) \ + hlist_for_each_entry(__nr_neigh, list, neigh_node) -#define nr_neigh_for_each_safe(__nr_neigh, node, node2, list) \ - hlist_for_each_entry_safe(__nr_neigh, node, node2, list, neigh_node) +#define nr_neigh_for_each_safe(__nr_neigh, node2, list) \ + hlist_for_each_entry_safe(__nr_neigh, node2, list, neigh_node) -#define nr_node_for_each(__nr_node, node, list) \ - hlist_for_each_entry(__nr_node, node, list, node_node) +#define nr_node_for_each(__nr_node, list) \ + hlist_for_each_entry(__nr_node, list, node_node) -#define nr_node_for_each_safe(__nr_node, node, node2, list) \ - hlist_for_each_entry_safe(__nr_node, node, node2, list, node_node) +#define nr_node_for_each_safe(__nr_node, node2, list) \ + hlist_for_each_entry_safe(__nr_node, node2, list, node_node) /*********************************************************************/ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2761c905504e..f10818fc8804 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -339,11 +339,10 @@ static inline struct Qdisc_class_common * qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) { struct Qdisc_class_common *cl; - struct hlist_node *n; unsigned int h; h = qdisc_class_hash(id, hash->hashmask); - hlist_for_each_entry(cl, n, &hash->hash[h], hnode) { + hlist_for_each_entry(cl, &hash->hash[h], hnode) { if (cl->classid == id) return cl; } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 7fdf298a47ef..df85a0c0f2d5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -675,8 +675,8 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) return h & (sctp_assoc_hashsize - 1); } -#define sctp_for_each_hentry(epb, node, head) \ - hlist_for_each_entry(epb, node, head, node) +#define sctp_for_each_hentry(epb, head) \ + hlist_for_each_entry(epb, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/sock.h b/include/net/sock.h index a66caa223d18..14f6e9d19dc7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -606,24 +606,23 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } -#define sk_for_each(__sk, node, list) \ - hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu(__sk, node, list) \ - hlist_for_each_entry_rcu(__sk, node, list, sk_node) +#define sk_for_each(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_node) +#define sk_for_each_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, sk_node) #define sk_nulls_for_each(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) #define sk_nulls_for_each_rcu(__sk, node, list) \ hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) -#define sk_for_each_from(__sk, node) \ - if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ - hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_for_each_from(__sk) \ + hlist_for_each_entry_from(__sk, sk_node) #define sk_nulls_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) -#define sk_for_each_safe(__sk, node, tmp, list) \ - hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) -#define sk_for_each_bound(__sk, node, list) \ - hlist_for_each_entry(__sk, node, list, sk_bind_node) +#define sk_for_each_safe(__sk, tmp, list) \ + hlist_for_each_entry_safe(__sk, tmp, list, sk_node) +#define sk_for_each_bound(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind_node) static inline struct user_namespace *sk_user_ns(struct sock *sk) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 40e0df6c2a2f..a32f9432666c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -554,7 +554,6 @@ static struct css_set *find_existing_css_set( { int i; struct cgroupfs_root *root = cgrp->root; - struct hlist_node *node; struct css_set *cg; unsigned long key; @@ -577,7 +576,7 @@ static struct css_set *find_existing_css_set( } key = css_set_hash(template); - hash_for_each_possible(css_set_table, cg, node, hlist, key) { + hash_for_each_possible(css_set_table, cg, hlist, key) { if (!compare_css_sets(cg, oldcg, cgrp, template)) continue; @@ -1611,7 +1610,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, struct cgroupfs_root *existing_root; const struct cred *cred; int i; - struct hlist_node *node; struct css_set *cg; BUG_ON(sb->s_root != NULL); @@ -1666,7 +1664,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, /* Link the top cgroup in this hierarchy into all * the css_set objects */ write_lock(&css_set_lock); - hash_for_each(css_set_table, i, node, cg, hlist) + hash_for_each(css_set_table, i, cg, hlist) link_css_set(&tmp_cg_links, cg, root_cgrp); write_unlock(&css_set_lock); @@ -4493,7 +4491,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; int i, ret; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct css_set *cg; unsigned long key; @@ -4561,7 +4559,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) * this is all done under the css_set_lock. */ write_lock(&css_set_lock); - hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) { + hash_for_each_safe(css_set_table, i, tmp, cg, hlist) { /* skip entries that we already rehashed */ if (cg->subsys[ss->subsys_id]) continue; @@ -4571,7 +4569,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) cg->subsys[ss->subsys_id] = css; /* recompute hash and restore entry */ key = css_set_hash(cg->subsys); - hash_add(css_set_table, node, key); + hash_add(css_set_table, &cg->hlist, key); } write_unlock(&css_set_lock); diff --git a/kernel/events/core.c b/kernel/events/core.c index 5a92cf6beff0..b0cd86501c30 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5126,7 +5126,6 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, { struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct perf_event *event; - struct hlist_node *node; struct hlist_head *head; rcu_read_lock(); @@ -5134,7 +5133,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (!head) goto end; - hlist_for_each_entry_rcu(event, node, head, hlist_entry) { + hlist_for_each_entry_rcu(event, head, hlist_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) perf_swevent_event(event, nr, data, regs); } @@ -5419,7 +5418,6 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, { struct perf_sample_data data; struct perf_event *event; - struct hlist_node *node; struct perf_raw_record raw = { .size = entry_size, @@ -5429,7 +5427,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, perf_sample_data_init(&data, addr, 0); data.raw = &raw; - hlist_for_each_entry_rcu(event, node, head, hlist_entry) { + hlist_for_each_entry_rcu(event, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 550294d58a02..e35be53f6613 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -334,11 +334,10 @@ static inline void reset_kprobe_instance(void) struct kprobe __kprobes *get_kprobe(void *addr) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { if (p->addr == addr) return p; } @@ -799,7 +798,6 @@ out: static void __kprobes optimize_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -810,7 +808,7 @@ static void __kprobes optimize_all_kprobes(void) kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) if (!kprobe_disabled(p)) optimize_kprobe(p); } @@ -821,7 +819,6 @@ static void __kprobes optimize_all_kprobes(void) static void __kprobes unoptimize_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -832,7 +829,7 @@ static void __kprobes unoptimize_all_kprobes(void) kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { if (!kprobe_disabled(p)) unoptimize_kprobe(p, false); } @@ -1148,7 +1145,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) { struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long hash, flags = 0; if (unlikely(!kprobes_initialized)) @@ -1159,12 +1156,12 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) hash = hash_ptr(tk, KPROBE_HASH_BITS); head = &kretprobe_inst_table[hash]; kretprobe_table_lock(hash, &flags); - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri, &empty_rp); } kretprobe_table_unlock(hash, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -1173,9 +1170,9 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) static inline void free_rp_inst(struct kretprobe *rp) { struct kretprobe_instance *ri; - struct hlist_node *pos, *next; + struct hlist_node *next; - hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { + hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -1185,14 +1182,14 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp) { unsigned long flags, hash; struct kretprobe_instance *ri; - struct hlist_node *pos, *next; + struct hlist_node *next; struct hlist_head *head; /* No race here */ for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { kretprobe_table_lock(hash, &flags); head = &kretprobe_inst_table[hash]; - hlist_for_each_entry_safe(ri, pos, next, head, hlist) { + hlist_for_each_entry_safe(ri, next, head, hlist) { if (ri->rp == rp) ri->rp = NULL; } @@ -2028,7 +2025,6 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb, { struct module *mod = data; struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; int checkcore = (val == MODULE_STATE_GOING); @@ -2045,7 +2041,7 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2192,7 +2188,6 @@ static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p, *kp; const char *sym = NULL; unsigned int i = *(loff_t *) v; @@ -2201,7 +2196,7 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) head = &kprobe_table[i]; preempt_disable(); - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { sym = kallsyms_lookup((unsigned long)p->addr, NULL, &offset, &modname, namebuf); if (kprobe_aggrprobe(p)) { @@ -2236,7 +2231,6 @@ static const struct file_operations debugfs_kprobes_operations = { static void __kprobes arm_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -2249,7 +2243,7 @@ static void __kprobes arm_all_kprobes(void) /* Arming kprobes doesn't optimize kprobe itself */ for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) if (!kprobe_disabled(p)) arm_kprobe(p); } @@ -2265,7 +2259,6 @@ already_enabled: static void __kprobes disarm_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -2282,7 +2275,7 @@ static void __kprobes disarm_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) disarm_kprobe(p, false); } diff --git a/kernel/pid.c b/kernel/pid.c index f2c6a6825098..047dc6264638 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -350,10 +350,9 @@ void disable_pid_allocation(struct pid_namespace *ns) struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { - struct hlist_node *elem; struct upid *pnr; - hlist_for_each_entry_rcu(pnr, elem, + hlist_for_each_entry_rcu(pnr, &pid_hash[pid_hashfn(nr, ns)], pid_chain) if (pnr->nr == nr && pnr->ns == ns) return container_of(pnr, struct pid, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b5243176aba..12af4270c9c1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1752,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister); static void fire_sched_in_preempt_notifiers(struct task_struct *curr) { struct preempt_notifier *notifier; - struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) notifier->ops->sched_in(notifier, raw_smp_processor_id()); } @@ -1763,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, struct task_struct *next) { struct preempt_notifier *notifier; - struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) notifier->ops->sched_out(notifier, next); } diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d4abac261779..b9bde5727829 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -131,7 +131,7 @@ static int smpboot_thread_fn(void *data) continue; } - BUG_ON(td->cpu != smp_processor_id()); + //BUG_ON(td->cpu != smp_processor_id()); /* Check for state change setup */ switch (td->status) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 98ca94a41819..ab25b88aae56 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -762,7 +762,6 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) { struct ftrace_profile *rec; struct hlist_head *hhd; - struct hlist_node *n; unsigned long key; key = hash_long(ip, ftrace_profile_bits); @@ -771,7 +770,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) if (hlist_empty(hhd)) return NULL; - hlist_for_each_entry_rcu(rec, n, hhd, node) { + hlist_for_each_entry_rcu(rec, hhd, node) { if (rec->ip == ip) return rec; } @@ -1133,7 +1132,6 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) unsigned long key; struct ftrace_func_entry *entry; struct hlist_head *hhd; - struct hlist_node *n; if (ftrace_hash_empty(hash)) return NULL; @@ -1145,7 +1143,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) hhd = &hash->buckets[key]; - hlist_for_each_entry_rcu(entry, n, hhd, hlist) { + hlist_for_each_entry_rcu(entry, hhd, hlist) { if (entry->ip == ip) return entry; } @@ -1202,7 +1200,7 @@ remove_hash_entry(struct ftrace_hash *hash, static void ftrace_hash_clear(struct ftrace_hash *hash) { struct hlist_head *hhd; - struct hlist_node *tp, *tn; + struct hlist_node *tn; struct ftrace_func_entry *entry; int size = 1 << hash->size_bits; int i; @@ -1212,7 +1210,7 @@ static void ftrace_hash_clear(struct ftrace_hash *hash) for (i = 0; i < size; i++) { hhd = &hash->buckets[i]; - hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) + hlist_for_each_entry_safe(entry, tn, hhd, hlist) free_hash_entry(hash, entry); } FTRACE_WARN_ON(hash->count); @@ -1275,7 +1273,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) { struct ftrace_func_entry *entry; struct ftrace_hash *new_hash; - struct hlist_node *tp; int size; int ret; int i; @@ -1290,7 +1287,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) size = 1 << hash->size_bits; for (i = 0; i < size; i++) { - hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { ret = add_hash_entry(new_hash, entry->ip); if (ret < 0) goto free_hash; @@ -1316,7 +1313,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash **dst, struct ftrace_hash *src) { struct ftrace_func_entry *entry; - struct hlist_node *tp, *tn; + struct hlist_node *tn; struct hlist_head *hhd; struct ftrace_hash *old_hash; struct ftrace_hash *new_hash; @@ -1362,7 +1359,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, size = 1 << src->size_bits; for (i = 0; i < size; i++) { hhd = &src->buckets[i]; - hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) { + hlist_for_each_entry_safe(entry, tn, hhd, hlist) { if (bits > 0) key = hash_long(entry->ip, bits); else @@ -2901,7 +2898,6 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, { struct ftrace_func_probe *entry; struct hlist_head *hhd; - struct hlist_node *n; unsigned long key; key = hash_long(ip, FTRACE_HASH_BITS); @@ -2917,7 +2913,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - hlist_for_each_entry_rcu(entry, n, hhd, node) { + hlist_for_each_entry_rcu(entry, hhd, node) { if (entry->ip == ip) entry->ops->func(ip, parent_ip, &entry->data); } @@ -3068,7 +3064,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data, int flags) { struct ftrace_func_probe *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; char str[KSYM_SYMBOL_LEN]; int type = MATCH_FULL; int i, len = 0; @@ -3091,7 +3087,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { struct hlist_head *hhd = &ftrace_func_hash[i]; - hlist_for_each_entry_safe(entry, n, tmp, hhd, node) { + hlist_for_each_entry_safe(entry, tmp, hhd, node) { /* break up if statements for readability */ if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 194d79602dc7..697e88d13907 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -739,12 +739,11 @@ static int task_state_char(unsigned long state) struct trace_event *ftrace_find_event(int type) { struct trace_event *event; - struct hlist_node *n; unsigned key; key = type & (EVENT_HASHSIZE - 1); - hlist_for_each_entry(event, n, &event_hash[key], node) { + hlist_for_each_entry(event, &event_hash[key], node) { if (event->type == type) return event; } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index d96ba22dabfa..0c05a4592047 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -192,12 +192,11 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, static struct tracepoint_entry *get_tracepoint(const char *name) { struct hlist_head *head; - struct hlist_node *node; struct tracepoint_entry *e; u32 hash = jhash(name, strlen(name), 0); head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, node, head, hlist) { + hlist_for_each_entry(e, head, hlist) { if (!strcmp(name, e->name)) return e; } @@ -211,13 +210,12 @@ static struct tracepoint_entry *get_tracepoint(const char *name) static struct tracepoint_entry *add_tracepoint(const char *name) { struct hlist_head *head; - struct hlist_node *node; struct tracepoint_entry *e; size_t name_len = strlen(name) + 1; u32 hash = jhash(name, name_len-1, 0); head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, node, head, hlist) { + hlist_for_each_entry(e, head, hlist) { if (!strcmp(name, e->name)) { printk(KERN_NOTICE "tracepoint %s busy\n", name); diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 1744bb80f1fb..394f70b17162 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c @@ -34,11 +34,11 @@ EXPORT_SYMBOL_GPL(user_return_notifier_unregister); void fire_user_return_notifiers(void) { struct user_return_notifier *urn; - struct hlist_node *tmp1, *tmp2; + struct hlist_node *tmp2; struct hlist_head *head; head = &get_cpu_var(return_notifier_list); - hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link) + hlist_for_each_entry_safe(urn, tmp2, head, link) urn->on_user_return(urn); put_cpu_var(return_notifier_list); } diff --git a/kernel/user.c b/kernel/user.c index 57ebfd42023c..e81978e8c03b 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -105,9 +105,8 @@ static void uid_hash_remove(struct user_struct *up) static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) { struct user_struct *user; - struct hlist_node *h; - hlist_for_each_entry(user, h, hashent, uidhash_node) { + hlist_for_each_entry(user, hashent, uidhash_node) { if (uid_eq(user->uid, uid)) { atomic_inc(&user->__count); return user; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f4feacad3812..81f2457811eb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -251,8 +251,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq); for ((pool) = &std_worker_pools(cpu)[0]; \ (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++) -#define for_each_busy_worker(worker, i, pos, pool) \ - hash_for_each(pool->busy_hash, i, pos, worker, hentry) +#define for_each_busy_worker(worker, i, pool) \ + hash_for_each(pool->busy_hash, i, worker, hentry) static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, unsigned int sw) @@ -909,9 +909,8 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool, struct work_struct *work) { struct worker *worker; - struct hlist_node *tmp; - hash_for_each_possible(pool->busy_hash, worker, tmp, hentry, + hash_for_each_possible(pool->busy_hash, worker, hentry, (unsigned long)work) if (worker->current_work == work && worker->current_func == work->func) @@ -1626,7 +1625,6 @@ static void busy_worker_rebind_fn(struct work_struct *work) static void rebind_workers(struct worker_pool *pool) { struct worker *worker, *n; - struct hlist_node *pos; int i; lockdep_assert_held(&pool->assoc_mutex); @@ -1648,7 +1646,7 @@ static void rebind_workers(struct worker_pool *pool) } /* rebind busy workers */ - for_each_busy_worker(worker, i, pos, pool) { + for_each_busy_worker(worker, i, pool) { struct work_struct *rebind_work = &worker->rebind_work; struct workqueue_struct *wq; @@ -3423,7 +3421,6 @@ static void wq_unbind_fn(struct work_struct *work) int cpu = smp_processor_id(); struct worker_pool *pool; struct worker *worker; - struct hlist_node *pos; int i; for_each_std_worker_pool(pool, cpu) { @@ -3442,7 +3439,7 @@ static void wq_unbind_fn(struct work_struct *work) list_for_each_entry(worker, &pool->idle_list, entry) worker->flags |= WORKER_UNBOUND; - for_each_busy_worker(worker, i, pos, pool) + for_each_busy_worker(worker, i, pool) worker->flags |= WORKER_UNBOUND; pool->flags |= POOL_DISASSOCIATED; diff --git a/lib/debugobjects.c b/lib/debugobjects.c index d11808ca4bc4..37061ede8b81 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -109,11 +109,10 @@ static void fill_pool(void) */ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) { - struct hlist_node *node; struct debug_obj *obj; int cnt = 0; - hlist_for_each_entry(obj, node, &b->list, node) { + hlist_for_each_entry(obj, &b->list, node) { cnt++; if (obj->object == addr) return obj; @@ -213,7 +212,7 @@ static void free_object(struct debug_obj *obj) static void debug_objects_oom(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; HLIST_HEAD(freelist); struct debug_obj *obj; unsigned long flags; @@ -227,7 +226,7 @@ static void debug_objects_oom(void) raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ - hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_for_each_entry_safe(obj, tmp, &freelist, node) { hlist_del(&obj->node); free_object(obj); } @@ -658,7 +657,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr, static void __debug_check_no_obj_freed(const void *address, unsigned long size) { unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; HLIST_HEAD(freelist); struct debug_obj_descr *descr; enum debug_obj_state state; @@ -678,7 +677,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) repeat: cnt = 0; raw_spin_lock_irqsave(&db->lock, flags); - hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { + hlist_for_each_entry_safe(obj, tmp, &db->list, node) { cnt++; oaddr = (unsigned long) obj->object; if (oaddr < saddr || oaddr >= eaddr) @@ -702,7 +701,7 @@ repeat: raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ - hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_for_each_entry_safe(obj, tmp, &freelist, node) { hlist_del(&obj->node); free_object(obj); } @@ -1013,7 +1012,7 @@ void __init debug_objects_early_init(void) static int __init debug_objects_replace_static_objects(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct debug_obj *obj, *new; HLIST_HEAD(objects); int i, cnt = 0; @@ -1033,7 +1032,7 @@ static int __init debug_objects_replace_static_objects(void) local_irq_disable(); /* Remove the statically allocated objects from the pool */ - hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node) + hlist_for_each_entry_safe(obj, tmp, &obj_pool, node) hlist_del(&obj->node); /* Move the allocated objects to the pool */ hlist_move_list(&objects, &obj_pool); @@ -1042,7 +1041,7 @@ static int __init debug_objects_replace_static_objects(void) for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { hlist_move_list(&db->list, &objects); - hlist_for_each_entry(obj, node, &objects, node) { + hlist_for_each_entry(obj, &objects, node) { new = hlist_entry(obj_pool.first, typeof(*obj), node); hlist_del(&new->node); /* copy object data */ @@ -1057,7 +1056,7 @@ static int __init debug_objects_replace_static_objects(void) obj_pool_used); return 0; free: - hlist_for_each_entry_safe(obj, node, tmp, &objects, node) { + hlist_for_each_entry_safe(obj, tmp, &objects, node) { hlist_del(&obj->node); kmem_cache_free(obj_cache, obj); } diff --git a/lib/lru_cache.c b/lib/lru_cache.c index d71d89498943..8335d39d2ccd 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -262,12 +262,11 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, bool include_changing) { - struct hlist_node *n; struct lc_element *e; BUG_ON(!lc); BUG_ON(!lc->nr_elements); - hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) { /* "about to be changed" elements, pending transaction commit, * are hashed by their "new number". "Normal" elements have * lc_number == lc_new_number. */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bfa142e67b1c..e2f7f5aaaafb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1906,9 +1906,8 @@ static inline void free_mm_slot(struct mm_slot *mm_slot) static struct mm_slot *get_mm_slot(struct mm_struct *mm) { struct mm_slot *mm_slot; - struct hlist_node *node; - hash_for_each_possible(mm_slots_hash, mm_slot, node, hash, (unsigned long)mm) + hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm) if (mm == mm_slot->mm) return mm_slot; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 83dd5fbf5e60..c8d7f3110fd0 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -436,7 +436,7 @@ static int get_object(struct kmemleak_object *object) */ static void free_object_rcu(struct rcu_head *rcu) { - struct hlist_node *elem, *tmp; + struct hlist_node *tmp; struct kmemleak_scan_area *area; struct kmemleak_object *object = container_of(rcu, struct kmemleak_object, rcu); @@ -445,8 +445,8 @@ static void free_object_rcu(struct rcu_head *rcu) * Once use_count is 0 (guaranteed by put_object), there is no other * code accessing this object, hence no need for locking. */ - hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) { - hlist_del(elem); + hlist_for_each_entry_safe(area, tmp, &object->area_list, node) { + hlist_del(&area->node); kmem_cache_free(scan_area_cache, area); } kmem_cache_free(object_cache, object); @@ -1177,7 +1177,6 @@ static void scan_block(void *_start, void *_end, static void scan_object(struct kmemleak_object *object) { struct kmemleak_scan_area *area; - struct hlist_node *elem; unsigned long flags; /* @@ -1205,7 +1204,7 @@ static void scan_object(struct kmemleak_object *object) spin_lock_irqsave(&object->lock, flags); } } else - hlist_for_each_entry(area, elem, &object->area_list, node) + hlist_for_each_entry(area, &object->area_list, node) scan_block((void *)area->start, (void *)(area->start + area->size), object, 0); diff --git a/mm/ksm.c b/mm/ksm.c index ab2ba9ad3c59..85bfd4c16346 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -320,10 +320,9 @@ static inline void free_mm_slot(struct mm_slot *mm_slot) static struct mm_slot *get_mm_slot(struct mm_struct *mm) { - struct hlist_node *node; struct mm_slot *slot; - hash_for_each_possible(mm_slots_hash, slot, node, link, (unsigned long)mm) + hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm) if (slot->mm == mm) return slot; @@ -496,9 +495,8 @@ static inline int get_kpfn_nid(unsigned long kpfn) static void remove_node_from_stable_tree(struct stable_node *stable_node) { struct rmap_item *rmap_item; - struct hlist_node *hlist; - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { if (rmap_item->hlist.next) ksm_pages_sharing--; else @@ -1898,7 +1896,6 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, { struct stable_node *stable_node; struct rmap_item *rmap_item; - struct hlist_node *hlist; unsigned int mapcount = page_mapcount(page); int referenced = 0; int search_new_forks = 0; @@ -1910,7 +1907,7 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, if (!stable_node) return 0; again: - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; @@ -1952,7 +1949,6 @@ out: int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) { struct stable_node *stable_node; - struct hlist_node *hlist; struct rmap_item *rmap_item; int ret = SWAP_AGAIN; int search_new_forks = 0; @@ -1964,7 +1960,7 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) if (!stable_node) return SWAP_FAIL; again: - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; @@ -2005,7 +2001,6 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, struct vm_area_struct *, unsigned long, void *), void *arg) { struct stable_node *stable_node; - struct hlist_node *hlist; struct rmap_item *rmap_item; int ret = SWAP_AGAIN; int search_new_forks = 0; @@ -2017,7 +2012,7 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, if (!stable_node) return ret; again: - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 2175fb0d501c..be04122fb277 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -95,11 +95,10 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *mn; - struct hlist_node *n; int young = 0, id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->clear_flush_young) young |= mn->ops->clear_flush_young(mn, mm, address); } @@ -112,11 +111,10 @@ int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *mn; - struct hlist_node *n; int young = 0, id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->test_young) { young = mn->ops->test_young(mn, mm, address); if (young) @@ -132,11 +130,10 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->change_pte) mn->ops->change_pte(mn, mm, address, pte); } @@ -147,11 +144,10 @@ void __mmu_notifier_invalidate_page(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->invalidate_page) mn->ops->invalidate_page(mn, mm, address); } @@ -162,11 +158,10 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->invalidate_range_start) mn->ops->invalidate_range_start(mn, mm, start, end); } @@ -178,11 +173,10 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->invalidate_range_end) mn->ops->invalidate_range_end(mn, mm, start, end); } diff --git a/net/9p/error.c b/net/9p/error.c index 2ab2de76010f..126fd0dceea2 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -221,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init); int p9_errstr2errno(char *errstr, int len) { int errno; - struct hlist_node *p; struct errormap *c; int bucket; errno = 0; - p = NULL; c = NULL; bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + hlist_for_each_entry(c, &hash_errmap[bucket], list) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index de2e950a0a7a..74dea377fe5b 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -655,7 +655,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, - .zc_request = p9_virtio_zc_request, + //.zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, /* * We leave one entry for input and one entry for response diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 33475291c9c1..4a141e3cf076 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -93,10 +93,9 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, struct atalk_iface *atif) { struct sock *s; - struct hlist_node *node; read_lock_bh(&atalk_sockets_lock); - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (to->sat_port != at->src_port) @@ -141,11 +140,10 @@ static struct sock *atalk_find_or_insert_socket(struct sock *sk, struct sockaddr_at *sat) { struct sock *s; - struct hlist_node *node; struct atalk_sock *at; write_lock_bh(&atalk_sockets_lock); - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && @@ -1084,9 +1082,8 @@ static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat) sat->sat_port < ATPORT_LAST; sat->sat_port++) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && diff --git a/net/atm/common.c b/net/atm/common.c index 806fc0a40051..7b491006eaf4 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -270,11 +270,11 @@ void atm_dev_release_vccs(struct atm_dev *dev) write_lock_irq(&vcc_sklist_lock); for (i = 0; i < VCC_HTABLE_SIZE; i++) { struct hlist_head *head = &vcc_hash[i]; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct sock *s; struct atm_vcc *vcc; - sk_for_each_safe(s, node, tmp, head) { + sk_for_each_safe(s, tmp, head) { vcc = atm_sk(s); if (vcc->dev == dev) { vcc_release_async(vcc, -EPIPE); @@ -317,11 +317,10 @@ static int adjust_tp(struct atm_trafprm *tp, unsigned char aal) static int check_ci(const struct atm_vcc *vcc, short vpi, int vci) { struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)]; - struct hlist_node *node; struct sock *s; struct atm_vcc *walk; - sk_for_each(s, node, head) { + sk_for_each(s, head) { walk = atm_sk(s); if (walk->dev != vcc->dev) continue; diff --git a/net/atm/lec.c b/net/atm/lec.c index 2e3d942e77f1..f23916be18fb 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -842,7 +842,9 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl, --*l; } - hlist_for_each_entry_from(tmp, e, next) { + tmp = container_of(e, struct lec_arp_table, next); + + hlist_for_each_entry_from(tmp, next) { if (--*l < 0) break; } @@ -1307,7 +1309,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) static int lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) { - struct hlist_node *node; struct lec_arp_table *entry; int i, remove_vcc = 1; @@ -1326,7 +1327,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT */ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (memcmp(to_remove->atm_addr, entry->atm_addr, ATM_ESA_LEN) == 0) { @@ -1364,14 +1365,13 @@ static const char *get_status_string(unsigned char st) static void dump_arp_table(struct lec_priv *priv) { - struct hlist_node *node; struct lec_arp_table *rulla; char buf[256]; int i, j, offset; pr_info("Dump %p:\n", priv); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(rulla, node, + hlist_for_each_entry(rulla, &priv->lec_arp_tables[i], next) { offset = 0; offset += sprintf(buf, "%d: %p\n", i, rulla); @@ -1403,7 +1403,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->lec_no_forward)) pr_info("No forward\n"); - hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) { + hlist_for_each_entry(rulla, &priv->lec_no_forward, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1428,7 +1428,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->lec_arp_empty_ones)) pr_info("Empty ones\n"); - hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) { + hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1453,7 +1453,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->mcast_fwds)) pr_info("Multicast Forward VCCs\n"); - hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) { + hlist_for_each_entry(rulla, &priv->mcast_fwds, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1487,7 +1487,7 @@ static void dump_arp_table(struct lec_priv *priv) static void lec_arp_destroy(struct lec_priv *priv) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; @@ -1499,7 +1499,7 @@ static void lec_arp_destroy(struct lec_priv *priv) spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { lec_arp_remove(priv, entry); lec_arp_put(entry); @@ -1507,7 +1507,7 @@ static void lec_arp_destroy(struct lec_priv *priv) INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { del_timer_sync(&entry->timer); lec_arp_clear_vccs(entry); @@ -1516,7 +1516,7 @@ static void lec_arp_destroy(struct lec_priv *priv) } INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_no_forward, next) { del_timer_sync(&entry->timer); lec_arp_clear_vccs(entry); @@ -1525,7 +1525,7 @@ static void lec_arp_destroy(struct lec_priv *priv) } INIT_HLIST_HEAD(&priv->lec_no_forward); - hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ lec_arp_clear_vccs(entry); hlist_del(&entry->next); @@ -1542,14 +1542,13 @@ static void lec_arp_destroy(struct lec_priv *priv) static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, const unsigned char *mac_addr) { - struct hlist_node *node; struct hlist_head *head; struct lec_arp_table *entry; pr_debug("%pM\n", mac_addr); head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])]; - hlist_for_each_entry(entry, node, head, next) { + hlist_for_each_entry(entry, head, next) { if (ether_addr_equal(mac_addr, entry->mac_addr)) return entry; } @@ -1686,7 +1685,7 @@ static void lec_arp_check_expire(struct work_struct *work) unsigned long flags; struct lec_priv *priv = container_of(work, struct lec_priv, lec_arp_work.work); - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; unsigned long now; int i; @@ -1696,7 +1695,7 @@ static void lec_arp_check_expire(struct work_struct *work) restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (__lec_arp_check_expire(entry, now, priv)) { struct sk_buff *skb; @@ -1823,14 +1822,14 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long permanent) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; pr_debug("\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) && (permanent || @@ -1855,7 +1854,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, unsigned int targetless_le_arp) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry, *tmp; int i; @@ -1870,7 +1869,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, * we have no entry in the cache. 7.1.30 */ if (!hlist_empty(&priv->lec_arp_empty_ones)) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) { hlist_del(&entry->next); @@ -1915,7 +1914,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); del_timer(&entry->timer); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(tmp, node, + hlist_for_each_entry(tmp, &priv->lec_arp_tables[i], next) { if (entry != tmp && !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) { @@ -1956,7 +1955,6 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i, found_entry = 0; @@ -2026,7 +2024,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, ioc_data->atm_addr[16], ioc_data->atm_addr[17], ioc_data->atm_addr[18], ioc_data->atm_addr[19]); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (memcmp (ioc_data->atm_addr, entry->atm_addr, @@ -2103,7 +2101,6 @@ out: static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i; @@ -2111,7 +2108,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (entry->flush_tran_id == tran_id && entry->status == ESI_FLUSH_PENDING) { @@ -2140,13 +2137,12 @@ lec_set_flush_tran_id(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long tran_id) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i; spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { entry->flush_tran_id = tran_id; @@ -2198,7 +2194,7 @@ out: static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; @@ -2208,7 +2204,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (vcc == entry->vcc) { lec_arp_remove(priv, entry); @@ -2219,7 +2215,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (entry->vcc == vcc) { lec_arp_clear_vccs(entry); @@ -2229,7 +2225,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_no_forward, next) { if (entry->recv_vcc == vcc) { lec_arp_clear_vccs(entry); @@ -2239,7 +2235,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { if (entry->recv_vcc == vcc) { lec_arp_clear_vccs(entry); /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ @@ -2257,13 +2253,13 @@ lec_arp_check_empties(struct lec_priv *priv, struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry, *tmp; struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; unsigned char *src = hdr->h_source; spin_lock_irqsave(&priv->lec_arp_lock, flags); - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (vcc == entry->vcc) { del_timer(&entry->timer); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 86767ca908a3..4176887e72eb 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -217,7 +217,6 @@ static void purge_vcc(struct atm_vcc *vcc) static void sigd_close(struct atm_vcc *vcc) { - struct hlist_node *node; struct sock *s; int i; @@ -231,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc) for (i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); purge_vcc(vcc); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 69a06c47b648..7b11f8bc5071 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -81,14 +81,13 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; - struct hlist_node *node; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; spin_lock_bh(&ax25_list_lock); again: - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { s->ax25_dev = NULL; spin_unlock_bh(&ax25_list_lock); @@ -158,10 +157,9 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi, struct net_device *dev, int type) { ax25_cb *s; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if ((s->iamdigi && !digi) || (!s->iamdigi && digi)) continue; if (s->sk && !ax25cmp(&s->source_addr, addr) && @@ -187,10 +185,9 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, { struct sock *sk = NULL; ax25_cb *s; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk && !ax25cmp(&s->source_addr, my_addr) && !ax25cmp(&s->dest_addr, dest_addr) && s->sk->sk_type == type) { @@ -213,10 +210,9 @@ ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr, ax25_digi *digi, struct net_device *dev) { ax25_cb *s; - struct hlist_node *node; spin_lock_bh(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk && s->sk->sk_type != SOCK_SEQPACKET) continue; if (s->ax25_dev == NULL) @@ -248,10 +244,9 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) { ax25_cb *s; struct sk_buff *copy; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && s->sk->sk_type == SOCK_RAW && s->sk->sk_protocol == proto && diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index 5ea7fd3e2af9..e05bd57b5afd 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -39,7 +39,6 @@ void ax25_ds_nr_error_recovery(ax25_cb *ax25) void ax25_ds_enquiry_response(ax25_cb *ax25) { ax25_cb *ax25o; - struct hlist_node *node; /* Please note that neither DK4EG's nor DG2FEF's * DAMA spec mention the following behaviour as seen @@ -80,7 +79,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) ax25_ds_set_timer(ax25->ax25_dev); spin_lock(&ax25_list_lock); - ax25_for_each(ax25o, node, &ax25_list) { + ax25_for_each(ax25o, &ax25_list) { if (ax25o == ax25) continue; @@ -159,10 +158,9 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev) { ax25_cb *ax25; int res = 0; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(ax25, node, &ax25_list) + ax25_for_each(ax25, &ax25_list) if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) { res = 1; break; diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 993c439b4f71..951cd57bb07d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -70,7 +70,6 @@ static void ax25_ds_timeout(unsigned long arg) { ax25_dev *ax25_dev = (struct ax25_dev *) arg; ax25_cb *ax25; - struct hlist_node *node; if (ax25_dev == NULL || !ax25_dev->dama.slave) return; /* Yikes! */ @@ -81,7 +80,7 @@ static void ax25_ds_timeout(unsigned long arg) } spin_lock(&ax25_list_lock); - ax25_for_each(ax25, node, &ax25_list) { + ax25_for_each(ax25, &ax25_list) { if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE)) continue; diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 7d5f24b82cc8..7f16e8a931b2 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -193,10 +193,9 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) void ax25_link_failed(ax25_cb *ax25, int reason) { struct ax25_linkfail *lf; - struct hlist_node *node; spin_lock_bh(&linkfail_lock); - hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node) + hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node) lf->func(ax25, reason); spin_unlock_bh(&linkfail_lock); } diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 957999e43ff7..71c4badbc807 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -54,10 +54,9 @@ EXPORT_SYMBOL(ax25_uid_policy); ax25_uid_assoc *ax25_findbyuid(kuid_t uid) { ax25_uid_assoc *ax25_uid, *res = NULL; - struct hlist_node *node; read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (uid_eq(ax25_uid->uid, uid)) { ax25_uid_hold(ax25_uid); res = ax25_uid; @@ -74,7 +73,6 @@ EXPORT_SYMBOL(ax25_findbyuid); int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) { ax25_uid_assoc *ax25_uid; - struct hlist_node *node; ax25_uid_assoc *user; unsigned long res; @@ -82,7 +80,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) case SIOCAX25GETUID: res = -ENOENT; read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { res = from_kuid_munged(current_user_ns(), ax25_uid->uid); break; @@ -126,7 +124,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) ax25_uid = NULL; write_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) break; } @@ -212,11 +210,10 @@ const struct file_operations ax25_uid_fops = { void __exit ax25_uid_free(void) { ax25_uid_assoc *ax25_uid; - struct hlist_node *node; write_lock(&ax25_uid_lock); again: - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); goto again; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 72fe1bbf7721..a0b253ecadaf 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -487,7 +487,6 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, */ struct batadv_forw_packet *forw_packet_aggr = NULL; struct batadv_forw_packet *forw_packet_pos = NULL; - struct hlist_node *tmp_node; struct batadv_ogm_packet *batadv_ogm_packet; bool direct_link; unsigned long max_aggregation_jiffies; @@ -500,7 +499,7 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->forw_bat_list_lock); /* own packets are not to be aggregated */ if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { - hlist_for_each_entry(forw_packet_pos, tmp_node, + hlist_for_each_entry(forw_packet_pos, &bat_priv->forw_bat_list, list) { if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet, bat_priv, packet_len, @@ -655,7 +654,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - struct hlist_node *node; int if_num; uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; @@ -665,7 +663,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, "update_originator(): Searching and updating originator entry of received packet\n"); rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { neigh_addr = tmp_neigh_node->addr; if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && @@ -801,7 +799,6 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; - struct hlist_node *node; uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; @@ -810,7 +807,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, /* find corresponding one hop neighbor */ rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_neigh_node->neigh_list, list) { if (!batadv_compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) @@ -920,7 +917,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_node; struct batadv_neigh_node *tmp_neigh_node; - struct hlist_node *node; int is_duplicate = 0; int32_t seq_diff; int need_update = 0; @@ -943,7 +939,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, goto out; rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits, orig_node->last_real_seqno, diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 30f46526cbbd..6a4f728680ae 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -144,7 +144,6 @@ static struct batadv_bla_claim { struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_bla_claim *claim; struct batadv_bla_claim *claim_tmp = NULL; int index; @@ -156,7 +155,7 @@ static struct batadv_bla_claim head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { if (!batadv_compare_claim(&claim->hash_entry, data)) continue; @@ -185,7 +184,6 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_bla_backbone_gw search_entry, *backbone_gw; struct batadv_bla_backbone_gw *backbone_gw_tmp = NULL; int index; @@ -200,7 +198,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (!batadv_compare_backbone_gw(&backbone_gw->hash_entry, &search_entry)) continue; @@ -221,7 +219,7 @@ static void batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_hashtable *hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_bla_claim *claim; int i; @@ -236,13 +234,13 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(claim, node, node_tmp, + hlist_for_each_entry_safe(claim, node_tmp, head, hash_entry) { if (claim->backbone_gw != backbone_gw) continue; batadv_claim_free_ref(claim); - hlist_del_rcu(node); + hlist_del_rcu(&claim->hash_entry); } spin_unlock_bh(list_lock); } @@ -460,7 +458,6 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, short vid) { - struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; struct batadv_bla_claim *claim; @@ -481,7 +478,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { /* only own claims are interesting */ if (claim->backbone_gw != backbone_gw) continue; @@ -958,7 +955,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) { struct batadv_bla_backbone_gw *backbone_gw; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -973,7 +970,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(backbone_gw, node, node_tmp, + hlist_for_each_entry_safe(backbone_gw, node_tmp, head, hash_entry) { if (now) goto purge_now; @@ -992,7 +989,7 @@ purge_now: batadv_bla_del_backbone_claims(backbone_gw); - hlist_del_rcu(node); + hlist_del_rcu(&backbone_gw->hash_entry); batadv_backbone_gw_free_ref(backbone_gw); } spin_unlock_bh(list_lock); @@ -1013,7 +1010,6 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, int now) { struct batadv_bla_claim *claim; - struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; int i; @@ -1026,7 +1022,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { if (now) goto purge_now; if (!batadv_compare_eth(claim->backbone_gw->orig, @@ -1062,7 +1058,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *oldif) { struct batadv_bla_backbone_gw *backbone_gw; - struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; __be16 group; @@ -1086,7 +1081,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { /* own orig still holds the old value. */ if (!batadv_compare_eth(backbone_gw->orig, oldif->net_dev->dev_addr)) @@ -1112,7 +1107,6 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct delayed_work *delayed_work; struct batadv_priv *bat_priv; struct batadv_priv_bla *priv_bla; - struct hlist_node *node; struct hlist_head *head; struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hashtable *hash; @@ -1140,7 +1134,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (!batadv_compare_eth(backbone_gw->orig, primary_if->net_dev->dev_addr)) continue; @@ -1322,7 +1316,6 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_bla_backbone_gw *backbone_gw; int i; @@ -1336,7 +1329,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (batadv_compare_eth(backbone_gw->orig, orig)) { rcu_read_unlock(); return 1; @@ -1607,7 +1600,6 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct batadv_bla_claim *claim; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; bool is_own; @@ -1628,7 +1620,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", @@ -1652,7 +1644,6 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; int secs, msecs; uint32_t i; @@ -1674,7 +1665,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime); secs = msecs / 1000; diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 761a59002e34..d54188a112ea 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -83,7 +83,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, { spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_dat_entry *dat_entry; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -95,7 +95,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, list_lock = &bat_priv->dat.hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(dat_entry, node, node_tmp, head, + hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { /* if an helper function has been passed as parameter, * ask it if the entry has to be purged or not @@ -103,7 +103,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, if (to_purge && !to_purge(dat_entry)) continue; - hlist_del_rcu(node); + hlist_del_rcu(&dat_entry->hash_entry); batadv_dat_entry_free_ref(dat_entry); } spin_unlock_bh(list_lock); @@ -235,7 +235,6 @@ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) { struct hlist_head *head; - struct hlist_node *node; struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL; struct batadv_hashtable *hash = bat_priv->dat.hash; uint32_t index; @@ -247,7 +246,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { if (dat_entry->ip != ip) continue; @@ -465,7 +464,6 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, batadv_dat_addr_t max = 0, tmp_max = 0; struct batadv_orig_node *orig_node, *max_orig_node = NULL; struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; int i; @@ -481,7 +479,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { /* the dht space is a ring and addresses are unsigned */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -686,7 +684,6 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hashtable *hash = bat_priv->dat.hash; struct batadv_dat_entry *dat_entry; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; unsigned long last_seen_jiffies; int last_seen_msecs, last_seen_secs, last_seen_mins; @@ -704,7 +701,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { last_seen_jiffies = jiffies - dat_entry->last_update; last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); last_seen_mins = last_seen_msecs / 60000; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 074107f2cfaa..34f99a46ec1d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -114,7 +114,6 @@ static struct batadv_gw_node * batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_neigh_node *router; - struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; uint32_t gw_divisor; @@ -127,7 +126,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) gw_divisor *= 64; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; @@ -344,7 +343,6 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, uint8_t new_gwflags) { - struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw; /* Note: We don't need a NULL check here, since curr_gw never gets @@ -355,7 +353,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, curr_gw = batadv_gw_get_selected_gw_node(bat_priv); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->orig_node != orig_node) continue; @@ -403,7 +401,7 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, void batadv_gw_node_purge(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node, *curr_gw; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT); int do_deselect = 0; @@ -411,7 +409,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) spin_lock_bh(&bat_priv->gw.list_lock); - hlist_for_each_entry_safe(gw_node, node, node_tmp, + hlist_for_each_entry_safe(gw_node, node_tmp, &bat_priv->gw.list, list) { if (((!gw_node->deleted) || (time_before(jiffies, gw_node->deleted + timeout))) && @@ -476,7 +474,6 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hard_iface *primary_if; struct batadv_gw_node *gw_node; - struct hlist_node *node; int gw_count = 0; primary_if = batadv_seq_print_text_primary_if_get(seq); @@ -490,7 +487,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) primary_if->net_dev->dev_addr, net_dev->name); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 21fe6987733b..0488d70c8c35 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -345,9 +345,8 @@ void batadv_recv_handler_unregister(uint8_t packet_type) static struct batadv_algo_ops *batadv_algo_get(char *name) { struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; - struct hlist_node *node; - hlist_for_each_entry(bat_algo_ops_tmp, node, &batadv_algo_list, list) { + hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { if (strcmp(bat_algo_ops_tmp->name, name) != 0) continue; @@ -411,11 +410,10 @@ out: int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; - struct hlist_node *node; seq_printf(seq, "Available routing algorithms:\n"); - hlist_for_each_entry(bat_algo_ops, node, &batadv_algo_list, list) { + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { seq_printf(seq, "%s\n", bat_algo_ops->name); } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 457ea445217c..96fb80b724dc 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -118,7 +118,7 @@ out: static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node, *tmp_neigh_node; struct batadv_orig_node *orig_node; @@ -134,7 +134,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) } /* for all neighbors towards this originator ... */ - hlist_for_each_entry_safe(neigh_node, node, node_tmp, + hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); batadv_neigh_node_free_ref(neigh_node); @@ -161,7 +161,7 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) void batadv_originator_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; @@ -179,9 +179,9 @@ void batadv_originator_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(orig_node, node, node_tmp, + hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_free_ref(orig_node); } spin_unlock_bh(list_lock); @@ -274,7 +274,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node **best_neigh_node) { - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; bool neigh_purged = false; unsigned long last_seen; @@ -285,7 +285,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ - hlist_for_each_entry_safe(neigh_node, node, node_tmp, + hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; @@ -348,7 +348,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, static void _batadv_purge_orig(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; @@ -363,13 +363,13 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(orig_node, node, node_tmp, + hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { if (batadv_purge_orig_node(bat_priv, orig_node)) { if (orig_node->gw_flags) batadv_gw_node_delete(bat_priv, orig_node); - hlist_del_rcu(node); + hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_free_ref(orig_node); continue; } @@ -408,7 +408,6 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; struct hlist_head *head; struct batadv_hard_iface *primary_if; struct batadv_orig_node *orig_node; @@ -434,7 +433,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { neigh_node = batadv_orig_node_get_router(orig_node); if (!neigh_node) continue; @@ -453,7 +452,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - hlist_for_each_entry_rcu(neigh_node_tmp, node_tmp, + hlist_for_each_entry_rcu(neigh_node_tmp, &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node_tmp->addr, @@ -511,7 +510,6 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; uint32_t i; @@ -524,7 +522,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = batadv_orig_node_add_if(orig_node, max_if_num); spin_unlock_bh(&orig_node->ogm_cnt_lock); @@ -595,7 +593,6 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_hard_iface *hard_iface_tmp; struct batadv_orig_node *orig_node; @@ -609,7 +606,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = batadv_orig_node_del_if(orig_node, max_if_num, hard_iface->if_num); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 286bf743e76a..7df48fa7669d 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -68,7 +68,6 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_orig_node *orig_node, *orig_node_tmp = NULL; int index; @@ -79,7 +78,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { if (!batadv_compare_eth(orig_node, data)) continue; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 60ba03fc8390..5ee21cebbbb0 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -37,7 +37,6 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; unsigned long *word; @@ -49,7 +48,7 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); word_index = hard_iface->if_num * BATADV_NUM_WORDS; word = &(orig_node->bcast_own[word_index]); @@ -146,7 +145,6 @@ out: void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { - struct hlist_node *node; struct batadv_neigh_node *tmp_neigh_node, *router = NULL; uint8_t interference_candidate = 0; @@ -169,7 +167,7 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, * interface. If we do, we won't select this candidate because of * possible interference. */ - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { if (tmp_neigh_node == neigh_node) continue; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 80ca65fc89a1..a67cffde37ae 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -316,7 +316,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface) { struct batadv_forw_packet *forw_packet; - struct hlist_node *tmp_node, *safe_tmp_node; + struct hlist_node *safe_tmp_node; bool pending; if (hard_iface) @@ -329,7 +329,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, /* free bcast list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); - hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + hlist_for_each_entry_safe(forw_packet, safe_tmp_node, &bat_priv->forw_bcast_list, list) { /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface @@ -355,7 +355,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, /* free batman packet list */ spin_lock_bh(&bat_priv->forw_bat_list_lock); - hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + hlist_for_each_entry_safe(forw_packet, safe_tmp_node, &bat_priv->forw_bat_list, list) { /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d44672f4a349..98a66a021a60 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -56,7 +56,6 @@ static struct batadv_tt_common_entry * batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) { struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_common_entry *tt_common_entry_tmp = NULL; uint32_t index; @@ -68,7 +67,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (!batadv_compare_eth(tt_common_entry, data)) continue; @@ -257,7 +256,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry; int hash_added; bool roamed_back = false; @@ -339,7 +337,7 @@ check_roaming: /* These node are probably going to update their tt table */ head = &tt_global->orig_list; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { batadv_send_roam_adv(bat_priv, tt_global->common.addr, orig_entry->orig_node); } @@ -470,7 +468,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int last_seen_secs; @@ -494,7 +491,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -605,9 +602,9 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_common_entry *tt_common_entry; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -651,7 +648,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -665,9 +662,9 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); @@ -724,11 +721,10 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, { struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL; const struct hlist_head *head; - struct hlist_node *node; rcu_read_lock(); head = &entry->orig_list; - hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) { + hlist_for_each_entry_rcu(tmp_orig_entry, head, list) { if (tmp_orig_entry->orig_node != orig_node) continue; if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) @@ -940,12 +936,11 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry) { struct batadv_neigh_node *router = NULL; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL; int best_tq = 0; head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { router = batadv_orig_node_get_router(orig_entry->orig_node); if (!router) continue; @@ -973,7 +968,6 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, struct seq_file *seq) { struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry, *best_entry; struct batadv_tt_common_entry *tt_common_entry; uint16_t flags; @@ -997,7 +991,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { if (best_entry == orig_entry) continue; @@ -1020,7 +1014,6 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; @@ -1039,7 +1032,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -1059,13 +1052,13 @@ static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) { struct hlist_head *head; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; - hlist_for_each_entry_safe(orig_entry, node, safe, head, list) { - hlist_del_rcu(node); + hlist_for_each_entry_safe(orig_entry, safe, head, list) { + hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_free_ref(orig_entry); } spin_unlock_bh(&tt_global_entry->list_lock); @@ -1078,18 +1071,18 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv, const char *message) { struct hlist_head *head; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; - hlist_for_each_entry_safe(orig_entry, node, safe, head, list) { + hlist_for_each_entry_safe(orig_entry, safe, head, list) { if (orig_entry->orig_node == orig_node) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting %pM from global tt entry %pM: %s\n", orig_node->orig, tt_global_entry->common.addr, message); - hlist_del_rcu(node); + hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_free_ref(orig_entry); } } @@ -1108,7 +1101,6 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, { bool last_entry = true; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry; /* no local entry exists, case 1: @@ -1117,7 +1109,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, rcu_read_lock(); head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { if (orig_entry->orig_node != orig_node) { last_entry = false; break; @@ -1202,7 +1194,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_tt_common_entry *tt_common_entry; uint32_t i; struct batadv_hashtable *hash = bat_priv->tt.global_hash; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -1214,7 +1206,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, safe, + hlist_for_each_entry_safe(tt_common_entry, safe, head, hash_entry) { tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -1227,7 +1219,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM: %s\n", tt_global->common.addr, message); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_free_ref(tt_global); } } @@ -1262,7 +1254,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_head *head; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; char *msg = NULL; @@ -1274,7 +1266,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, @@ -1287,7 +1279,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) "Deleting global tt entry (%pM): %s\n", tt_global->common.addr, msg); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common->hash_entry); batadv_tt_global_entry_free_ref(tt_global); } @@ -1301,7 +1293,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -1315,9 +1307,9 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, common); @@ -1397,7 +1389,6 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int j; @@ -1406,7 +1397,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, common); @@ -1449,7 +1440,6 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) uint16_t total = 0, total_one; struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int j; @@ -1458,7 +1448,7 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common, head, hash_entry) { /* not yet committed clients have not to be taken into * account while computing the CRC */ @@ -1597,7 +1587,6 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_query_packet *tt_response; struct batadv_tt_change *tt_change; - struct hlist_node *node; struct hlist_head *head; struct sk_buff *skb = NULL; uint16_t tt_tot, tt_count; @@ -1627,7 +1616,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (tt_count == tt_tot) break; @@ -2307,7 +2296,6 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, uint32_t i; uint16_t changed_num = 0; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_common_entry *tt_common_entry; if (!hash) @@ -2317,7 +2305,7 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (enable) { if ((tt_common_entry->flags & flags) == flags) @@ -2342,7 +2330,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; @@ -2355,7 +2343,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { if (!(tt_common->flags & BATADV_TT_CLIENT_PENDING)) continue; @@ -2365,7 +2353,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) tt_common->addr); atomic_dec(&bat_priv->tt.local_entry_num); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common->hash_entry); tt_local = container_of(tt_common, struct batadv_tt_local_entry, common); diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 22d2785177d1..c053244b97bd 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -97,7 +97,6 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) { struct batadv_hashtable *hash = bat_priv->vis.hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_vis_info *vis_info, *vis_info_tmp = NULL; uint32_t index; @@ -108,8 +107,8 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) { - if (!batadv_vis_info_cmp(node, data)) + hlist_for_each_entry_rcu(vis_info, head, hash_entry) { + if (!batadv_vis_info_cmp(&vis_info->hash_entry, data)) continue; vis_info_tmp = vis_info; @@ -128,9 +127,8 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface, bool primary) { struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos; - hlist_for_each_entry(entry, pos, if_list, list) { + hlist_for_each_entry(entry, if_list, list) { if (batadv_compare_eth(entry->addr, interface)) return; } @@ -148,9 +146,8 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq, const struct hlist_head *if_list) { struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos; - hlist_for_each_entry(entry, pos, if_list, list) { + hlist_for_each_entry(entry, if_list, list) { if (entry->primary) seq_printf(seq, "PRIMARY, "); else @@ -198,9 +195,8 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, { int i; struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos; - hlist_for_each_entry(entry, pos, list, list) { + hlist_for_each_entry(entry, list, list) { seq_printf(seq, "%pM,", entry->addr); for (i = 0; i < packet->entries; i++) @@ -218,17 +214,16 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, const struct hlist_head *head) { - struct hlist_node *node; struct batadv_vis_info *info; struct batadv_vis_packet *packet; uint8_t *entries_pos; struct batadv_vis_info_entry *entries; struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos, *n; + struct hlist_node *n; HLIST_HEAD(vis_if_list); - hlist_for_each_entry_rcu(info, node, head, hash_entry) { + hlist_for_each_entry_rcu(info, head, hash_entry) { packet = (struct batadv_vis_packet *)info->skb_packet->data; entries_pos = (uint8_t *)packet + sizeof(*packet); entries = (struct batadv_vis_info_entry *)entries_pos; @@ -240,7 +235,7 @@ static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, batadv_vis_data_read_entries(seq, &vis_if_list, packet, entries); - hlist_for_each_entry_safe(entry, pos, n, &vis_if_list, list) { + hlist_for_each_entry_safe(entry, n, &vis_if_list, list) { hlist_del(&entry->list); kfree(entry); } @@ -519,7 +514,6 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv, { struct batadv_hashtable *hash = bat_priv->orig_hash; struct batadv_neigh_node *router; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; @@ -532,7 +526,7 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { router = batadv_orig_node_get_router(orig_node); if (!router) continue; @@ -571,7 +565,6 @@ static bool batadv_vis_packet_full(const struct batadv_vis_info *info) static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_neigh_node *router; @@ -605,7 +598,7 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { router = batadv_orig_node_get_router(orig_node); if (!router) continue; @@ -644,7 +637,7 @@ next: head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, head, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { packet_pos = skb_put(info->skb_packet, sizeof(*entry)); entry = (struct batadv_vis_info_entry *)packet_pos; @@ -673,14 +666,14 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) { uint32_t i; struct batadv_hashtable *hash = bat_priv->vis.hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_vis_info *info; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_safe(info, node, node_tmp, + hlist_for_each_entry_safe(info, node_tmp, head, hash_entry) { /* never purge own data. */ if (info == bat_priv->vis.my_info) @@ -688,7 +681,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) if (batadv_has_timed_out(info->first_seen, BATADV_VIS_TIMEOUT)) { - hlist_del(node); + hlist_del(&info->hash_entry); batadv_send_list_del(info); kref_put(&info->refcount, batadv_free_info); } @@ -700,7 +693,6 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_vis_info *info) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; @@ -715,7 +707,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { /* if it's a vis server and reachable, send it. */ if (!(orig_node->flags & BATADV_VIS_SERVER)) continue; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 07f073935811..6a93614f2c49 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -70,14 +70,13 @@ static struct bt_sock_list hci_sk_list = { void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; struct sk_buff *skb_copy = NULL; BT_DBG("hdev %p len %d", hdev, skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct hci_filter *flt; struct sk_buff *nskb; @@ -142,13 +141,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) { struct sock *sk; - struct hlist_node *node; BT_DBG("len %d", skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; /* Skip the original socket */ @@ -176,7 +174,6 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; struct sk_buff *skb_copy = NULL; __le16 opcode; @@ -210,7 +207,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND) @@ -251,13 +248,12 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) static void send_monitor_event(struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; BT_DBG("len %d", skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND) @@ -393,11 +389,10 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - struct hlist_node *node; /* Detach sockets from device */ read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { bh_lock_sock_nested(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ce3f6658f4b2..c23bae86263b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -107,15 +107,14 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) { struct sock *sk = NULL; - struct hlist_node *node; - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { if (rfcomm_pi(sk)->channel == channel && !bacmp(&bt_sk(sk)->src, src)) break; } - return node ? sk : NULL; + return sk ? sk : NULL; } /* Find socket with channel and source bdaddr. @@ -124,11 +123,10 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; - struct hlist_node *node; read_lock(&rfcomm_sk_list.lock); - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { if (state && sk->sk_state != state) continue; @@ -145,7 +143,7 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t * read_unlock(&rfcomm_sk_list.lock); - return node ? sk : sk1; + return sk ? sk : sk1; } static void rfcomm_sock_destruct(struct sock *sk) @@ -970,11 +968,10 @@ done: static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; - struct hlist_node *node; read_lock(&rfcomm_sk_list.lock); - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { seq_printf(f, "%pMR %pMR %d %d\n", &bt_sk(sk)->src, &bt_sk(sk)->dst, sk->sk_state, rfcomm_pi(sk)->channel); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b5178d62064e..79d87d8d4f51 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -259,10 +259,9 @@ drop: /* -------- Socket interface ---------- */ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba) { - struct hlist_node *node; struct sock *sk; - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -279,11 +278,10 @@ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba) static struct sock *sco_get_sock_listen(bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; - struct hlist_node *node; read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -298,7 +296,7 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src) read_unlock(&sco_sk_list.lock); - return node ? sk : sk1; + return sk ? sk : sk1; } static void sco_sock_destruct(struct sock *sk) @@ -951,14 +949,13 @@ static void sco_conn_ready(struct sco_conn *conn) int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) { struct sock *sk; - struct hlist_node *node; int lm = 0; BT_DBG("hdev %s, bdaddr %pMR", hdev->name, bdaddr); /* Find listening sockets */ read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -1018,11 +1015,10 @@ drop: static int sco_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; - struct hlist_node *node; read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { seq_printf(f, "%pMR %pMR %d\n", &bt_sk(sk)->src, &bt_sk(sk)->dst, sk->sk_state); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 8117900af4de..b0812c91c0f0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -181,9 +181,9 @@ void br_fdb_cleanup(unsigned long _data) spin_lock(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { unsigned long this_timer; if (f->is_static) continue; @@ -207,8 +207,8 @@ void br_fdb_flush(struct net_bridge *br) spin_lock_bh(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; - struct hlist_node *h, *n; - hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { + struct hlist_node *n; + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { if (!f->is_static) fdb_delete(br, f); } @@ -266,10 +266,9 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, const unsigned char *addr, __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, + hlist_for_each_entry_rcu(fdb, &br->hash[br_mac_hash(addr, vid)], hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) { @@ -315,14 +314,13 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, { struct __fdb_entry *fe = buf; int i, num = 0; - struct hlist_node *h; struct net_bridge_fdb_entry *f; memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); rcu_read_lock(); for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { if (num >= maxnum) goto out; @@ -363,10 +361,9 @@ static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, const unsigned char *addr, __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry(fdb, h, head, hlist) { + hlist_for_each_entry(fdb, head, hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) return fdb; @@ -378,10 +375,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, const unsigned char *addr, __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, head, hlist) { + hlist_for_each_entry_rcu(fdb, head, hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) return fdb; @@ -593,10 +589,9 @@ int br_fdb_dump(struct sk_buff *skb, goto out; for (i = 0; i < BR_HASH_SIZE; i++) { - struct hlist_node *h; struct net_bridge_fdb_entry *f; - hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { if (idx < cb->args[0]) goto skip; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 38991e03646d..9f97b850fc65 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -18,7 +18,6 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; - struct hlist_node *n; struct nlattr *nest; if (!br->multicast_router || hlist_empty(&br->router_list)) @@ -28,7 +27,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (nest == NULL) return -EMSGSIZE; - hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) { + hlist_for_each_entry_rcu(p, &br->router_list, rlist) { if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) goto fail; } @@ -61,12 +60,11 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, return -EMSGSIZE; for (i = 0; i < mdb->max; i++) { - struct hlist_node *h; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p, **pp; struct net_bridge_port *port; - hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) { if (idx < s_idx) goto skip; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7d886b0a8b7b..10e6fce1bb62 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -86,9 +86,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash) { struct net_bridge_mdb_entry *mp; - struct hlist_node *p; - hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) { if (br_ip_equal(&mp->addr, dst)) return mp; } @@ -178,13 +177,12 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, int elasticity) { struct net_bridge_mdb_entry *mp; - struct hlist_node *p; int maxlen; int len; int i; for (i = 0; i < old->max; i++) - hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver]) + hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver]) hlist_add_head(&mp->hlist[new->ver], &new->mhash[br_ip_hash(new, &mp->addr)]); @@ -194,7 +192,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, maxlen = 0; for (i = 0; i < new->max; i++) { len = 0; - hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver]) + hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver]) len++; if (len > maxlen) maxlen = len; @@ -510,14 +508,13 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; - struct hlist_node *p; unsigned int count = 0; unsigned int max; int elasticity; int err; mdb = rcu_dereference_protected(br->mdb, 1); - hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) { count++; if (unlikely(br_ip_equal(group, &mp->addr))) return mp; @@ -882,10 +879,10 @@ void br_multicast_disable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - struct hlist_node *p, *n; + struct hlist_node *n; spin_lock(&br->multicast_lock); - hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist) + hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_del_pg(br, pg); if (!hlist_unhashed(&port->rlist)) @@ -1025,12 +1022,12 @@ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *port) { struct net_bridge_port *p; - struct hlist_node *n, *slot = NULL; + struct hlist_node *slot = NULL; - hlist_for_each_entry(p, n, &br->router_list, rlist) { + hlist_for_each_entry(p, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; - slot = n; + slot = &p->rlist; } if (slot) @@ -1653,7 +1650,7 @@ void br_multicast_stop(struct net_bridge *br) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; - struct hlist_node *p, *n; + struct hlist_node *n; u32 ver; int i; @@ -1670,7 +1667,7 @@ void br_multicast_stop(struct net_bridge *br) ver = mdb->ver; for (i = 0; i < mdb->max; i++) { - hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i], + hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); call_rcu_bh(&mp->rcu, br_multicast_free_group); diff --git a/net/can/af_can.c b/net/can/af_can.c index ddac1ee2ed20..c48e5220bbac 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -516,7 +516,6 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, { struct receiver *r = NULL; struct hlist_head *rl; - struct hlist_node *next; struct dev_rcv_lists *d; if (dev && dev->type != ARPHRD_CAN) @@ -540,7 +539,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, * been registered before. */ - hlist_for_each_entry_rcu(r, next, rl, list) { + hlist_for_each_entry_rcu(r, rl, list) { if (r->can_id == can_id && r->mask == mask && r->func == func && r->data == data) break; @@ -552,7 +551,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, * will be NULL, while r will point to the last item of the list. */ - if (!next) { + if (!r) { printk(KERN_ERR "BUG: receive list entry not found for " "dev %s, id %03X, mask %03X\n", DNAME(dev), can_id, mask); @@ -590,7 +589,6 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r) static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) { struct receiver *r; - struct hlist_node *n; int matches = 0; struct can_frame *cf = (struct can_frame *)skb->data; canid_t can_id = cf->can_id; @@ -600,7 +598,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) if (can_id & CAN_ERR_FLAG) { /* check for error message frame entries only */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) { if (can_id & r->mask) { deliver(skb, r); matches++; @@ -610,13 +608,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } /* check for unfiltered entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) { deliver(skb, r); matches++; } /* check for can_id/mask entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) { if ((can_id & r->mask) == r->can_id) { deliver(skb, r); matches++; @@ -624,7 +622,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } /* check for inverted can_id/mask entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) { if ((can_id & r->mask) != r->can_id) { deliver(skb, r); matches++; @@ -636,7 +634,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) { if (r->can_id == can_id) { deliver(skb, r); matches++; @@ -644,7 +642,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } } else { can_id &= CAN_SFF_MASK; - hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) { + hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) { deliver(skb, r); matches++; } diff --git a/net/can/gw.c b/net/can/gw.c index c185fcd5e828..2d117dc5ebea 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -457,11 +457,11 @@ static int cgw_notifier(struct notifier_block *nb, if (msg == NETDEV_UNREGISTER) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); @@ -575,12 +575,11 @@ cancel: static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) { struct cgw_job *gwj = NULL; - struct hlist_node *n; int idx = 0; int s_idx = cb->args[0]; rcu_read_lock(); - hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) { + hlist_for_each_entry_rcu(gwj, &cgw_list, list) { if (idx < s_idx) goto cont; @@ -858,11 +857,11 @@ out: static void cgw_remove_all_jobs(void) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); kfree(gwj); @@ -872,7 +871,7 @@ static void cgw_remove_all_jobs(void) static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; struct rtcanmsg *r; struct cf_mod mod; struct can_can_gw ccgw; @@ -907,7 +906,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ASSERT_RTNL(); /* remove only the first matching entry */ - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { if (gwj->flags != r->flags) continue; diff --git a/net/can/proc.c b/net/can/proc.c index 497335892146..1ab8c888f102 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -195,9 +195,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, struct net_device *dev) { struct receiver *r; - struct hlist_node *n; - hlist_for_each_entry_rcu(r, n, rx_list, list) { + hlist_for_each_entry_rcu(r, rx_list, list) { char *fmt = (r->can_id & CAN_EFF_FLAG)? " %-5s %08x %08x %pK %pK %8ld %s\n" : " %-5s %03x %08x %pK %pK %8ld %s\n"; diff --git a/net/core/dev.c b/net/core/dev.c index 18d8b5acc343..a06a7a58dd11 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -658,11 +658,10 @@ __setup("netdev=", netdev_boot_setup); struct net_device *__dev_get_by_name(struct net *net, const char *name) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each_entry(dev, p, head, name_hlist) + hlist_for_each_entry(dev, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; @@ -684,11 +683,10 @@ EXPORT_SYMBOL(__dev_get_by_name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each_entry_rcu(dev, p, head, name_hlist) + hlist_for_each_entry_rcu(dev, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; @@ -735,11 +733,10 @@ EXPORT_SYMBOL(dev_get_by_name); struct net_device *__dev_get_by_index(struct net *net, int ifindex) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each_entry(dev, p, head, index_hlist) + hlist_for_each_entry(dev, head, index_hlist) if (dev->ifindex == ifindex) return dev; @@ -760,11 +757,10 @@ EXPORT_SYMBOL(__dev_get_by_index); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each_entry_rcu(dev, p, head, index_hlist) + hlist_for_each_entry_rcu(dev, head, index_hlist) if (dev->ifindex == ifindex) return dev; diff --git a/net/core/flow.c b/net/core/flow.c index 43f7495df27a..c56ea6f7f6c7 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -132,14 +132,14 @@ static void __flow_cache_shrink(struct flow_cache *fc, int shrink_to) { struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; for (i = 0; i < flow_cache_hash_size(fc); i++) { int saved = 0; - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (saved < shrink_to && flow_entry_valid(fle)) { @@ -211,7 +211,6 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache *fc = &flow_cache_global; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; - struct hlist_node *entry; struct flow_cache_object *flo; size_t keysize; unsigned int hash; @@ -235,7 +234,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_new_hash_rnd(fc, fcp); hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { + hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { if (tfle->net == net && tfle->family == family && tfle->dir == dir && @@ -301,13 +300,13 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache *fc = info->cache; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (flow_entry_valid(fle)) continue; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 0f6bb6f8d391..3174f1998ee6 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -16,12 +16,11 @@ static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff { struct net *net = seq_file_net(seq); struct net_device *dev; - struct hlist_node *p; struct hlist_head *h; unsigned int count = 0, offset = get_offset(*pos); h = &net->dev_name_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + hlist_for_each_entry_rcu(dev, h, name_hlist) { if (++count == offset) return dev; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d8aa20f6a46e..b376410ff259 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1060,7 +1060,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0, s_idx; struct net_device *dev; struct hlist_head *head; - struct hlist_node *node; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; @@ -1080,7 +1079,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index c4a2def5b7bd..c21f200eed93 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -175,12 +175,11 @@ static struct hlist_head *dn_find_list(struct sock *sk) static int check_port(__le16 port) { struct sock *sk; - struct hlist_node *node; if (port == 0) return -1; - sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; @@ -374,11 +373,10 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) { struct hlist_head *list = listen_hash(addr); - struct hlist_node *node; struct sock *sk; read_lock(&dn_hash_lock); - sk_for_each(sk, node, list) { + sk_for_each(sk, list) { struct dn_scp *scp = DN_SK(sk); if (sk->sk_state != TCP_LISTEN) continue; @@ -414,11 +412,10 @@ struct sock *dn_find_by_skb(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk; - struct hlist_node *node; struct dn_scp *scp; read_lock(&dn_hash_lock); - sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index f968c1b58f47..6c2445bcaba1 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -483,7 +483,6 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) unsigned int h, s_h; unsigned int e = 0, s_e; struct dn_fib_table *tb; - struct hlist_node *node; int dumped = 0; if (!net_eq(net, &init_net)) @@ -498,7 +497,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { e = 0; - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { + hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) { if (e < s_e) goto next; if (dumped) @@ -828,7 +827,6 @@ out: struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; - struct hlist_node *node; unsigned int h; if (n < RT_TABLE_MIN) @@ -839,7 +837,7 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) h = n & (DN_FIB_TABLE_HASHSZ - 1); rcu_read_lock(); - hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { + hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) { if (t->n == n) { rcu_read_unlock(); return t; @@ -885,11 +883,10 @@ void dn_fib_flush(void) { int flushed = 0; struct dn_fib_table *tb; - struct hlist_node *node; unsigned int h; for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) + hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) flushed += tb->flush(tb); } @@ -908,12 +905,12 @@ void __init dn_fib_table_init(void) void __exit dn_fib_table_cleanup(void) { struct dn_fib_table *t; - struct hlist_node *node, *next; + struct hlist_node *next; unsigned int h; write_lock(&dn_fib_tables_lock); for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], + hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h], hlist) { hlist_del(&t->hlist); kfree(t); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 16705611589a..e0da175f8e5b 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -350,7 +350,6 @@ static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id, int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk, *prev = NULL; - struct hlist_node *node; int ret = NET_RX_SUCCESS; u16 pan_id, short_addr; @@ -361,7 +360,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); read_lock(&dgram_lock); - sk_for_each(sk, node, &dgram_head) { + sk_for_each(sk, &dgram_head) { if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr, dgram_sk(sk))) { if (prev) { diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 50e823927d49..41f538b8e59c 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -221,10 +221,9 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; read_lock(&raw_lock); - sk_for_each(sk, node, &raw_head) { + sk_for_each(sk, &raw_head) { bh_lock_sock(sk); if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5281314886c1..f678507bc829 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -139,10 +139,9 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; - struct hlist_node *node; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { if (ifa->ifa_local == addr) { struct net_device *dev = ifa->ifa_dev->dev; @@ -588,7 +587,6 @@ static void check_lifetime(struct work_struct *work) { unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; - struct hlist_node *node; int i; now = jiffies; @@ -596,8 +594,7 @@ static void check_lifetime(struct work_struct *work) rcu_read_lock(); for (i = 0; i < IN4_ADDR_HSIZE; i++) { - hlist_for_each_entry_rcu(ifa, node, - &inet_addr_lst[i], hash) { + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age; if (ifa->ifa_flags & IFA_F_PERMANENT) @@ -1493,7 +1490,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct in_device *in_dev; struct in_ifaddr *ifa; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -1503,7 +1499,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (h > s_h || idx > s_idx) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 99f00d39d10b..eb4bb12b3eb4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -112,7 +112,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id) struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; unsigned int h; @@ -122,7 +121,7 @@ struct fib_table *fib_get_table(struct net *net, u32 id) rcu_read_lock(); head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { if (tb->tb_id == id) { rcu_read_unlock(); return tb; @@ -137,13 +136,12 @@ static void fib_flush(struct net *net) { int flushed = 0; struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb_hlist) + hlist_for_each_entry(tb, head, tb_hlist) flushed += fib_table_flush(tb); } @@ -656,7 +654,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; int dumped = 0; @@ -670,7 +667,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb_hlist) { + hlist_for_each_entry(tb, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -1117,11 +1114,11 @@ static void ip_fib_net_exit(struct net *net) for (i = 0; i < FIB_TABLE_HASHSZ; i++) { struct fib_table *tb; struct hlist_head *head; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; head = &net->ipv4.fib_table_hash[i]; - hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { - hlist_del(node); + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { + hlist_del(&tb->tb_hlist); fib_table_flush(tb); fib_free_table(tb); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4797a800faf8..8f6cb7a87cd6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -298,14 +298,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) static struct fib_info *fib_find_info(const struct fib_info *nfi) { struct hlist_head *head; - struct hlist_node *node; struct fib_info *fi; unsigned int hash; hash = fib_info_hashfn(nfi); head = &fib_info_hash[hash]; - hlist_for_each_entry(fi, node, head, fib_hash) { + hlist_for_each_entry(fi, head, fib_hash) { if (!net_eq(fi->fib_net, nfi->fib_net)) continue; if (fi->fib_nhs != nfi->fib_nhs) @@ -331,7 +330,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; - struct hlist_node *node; struct fib_nh *nh; unsigned int hash; @@ -339,7 +337,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { if (nh->nh_dev == dev && nh->nh_gw == gw && !(nh->nh_flags & RTNH_F_DEAD)) { @@ -721,10 +719,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; - struct hlist_node *node, *n; + struct hlist_node *n; struct fib_info *fi; - hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { + hlist_for_each_entry_safe(fi, n, head, fib_hash) { struct hlist_head *dest; unsigned int new_hash; @@ -739,10 +737,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, for (i = 0; i < old_size; i++) { struct hlist_head *lhead = &fib_info_laddrhash[i]; - struct hlist_node *node, *n; + struct hlist_node *n; struct fib_info *fi; - hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { + hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { struct hlist_head *ldest; unsigned int new_hash; @@ -1096,13 +1094,12 @@ int fib_sync_down_addr(struct net *net, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; - struct hlist_node *node; struct fib_info *fi; if (fib_info_laddrhash == NULL || local == 0) return 0; - hlist_for_each_entry(fi, node, head, fib_lhash) { + hlist_for_each_entry(fi, head, fib_lhash) { if (!net_eq(fi->fib_net, net)) continue; if (fi->fib_prefsrc == local) { @@ -1120,13 +1117,12 @@ int fib_sync_down_dev(struct net_device *dev, int force) struct fib_info *prev_fi = NULL; unsigned int hash = fib_devindex_hashfn(dev->ifindex); struct hlist_head *head = &fib_info_devhash[hash]; - struct hlist_node *node; struct fib_nh *nh; if (force) scope = -1; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int dead; @@ -1232,7 +1228,6 @@ int fib_sync_up(struct net_device *dev) struct fib_info *prev_fi; unsigned int hash; struct hlist_head *head; - struct hlist_node *node; struct fib_nh *nh; int ret; @@ -1244,7 +1239,7 @@ int fib_sync_up(struct net_device *dev) head = &fib_info_devhash[hash]; ret = 0; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int alive; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 61e03da3e1f5..ff06b7543d9f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -920,10 +920,9 @@ nomem: static struct leaf_info *find_leaf_info(struct leaf *l, int plen) { struct hlist_head *head = &l->list; - struct hlist_node *node; struct leaf_info *li; - hlist_for_each_entry_rcu(li, node, head, hlist) + hlist_for_each_entry_rcu(li, head, hlist) if (li->plen == plen) return li; @@ -943,12 +942,11 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen) static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) { struct leaf_info *li = NULL, *last = NULL; - struct hlist_node *node; if (hlist_empty(head)) { hlist_add_head_rcu(&new->hlist, head); } else { - hlist_for_each_entry(li, node, head, hlist) { + hlist_for_each_entry(li, head, hlist) { if (new->plen > li->plen) break; @@ -1354,9 +1352,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, { struct leaf_info *li; struct hlist_head *hhead = &l->list; - struct hlist_node *node; - hlist_for_each_entry_rcu(li, node, hhead, hlist) { + hlist_for_each_entry_rcu(li, hhead, hlist) { struct fib_alias *fa; if (l->key != (key & li->mask_plen)) @@ -1740,10 +1737,10 @@ static int trie_flush_leaf(struct leaf *l) { int found = 0; struct hlist_head *lih = &l->list; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct leaf_info *li = NULL; - hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { + hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); if (list_empty(&li->falh)) { @@ -1895,14 +1892,13 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { struct leaf_info *li; - struct hlist_node *node; int i, s_i; s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { if (i < s_i) { i++; continue; @@ -2092,14 +2088,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) if (IS_LEAF(n)) { struct leaf *l = (struct leaf *)n; struct leaf_info *li; - struct hlist_node *tmp; s->leaves++; s->totdepth += iter.depth; if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, tmp, &l->list, hlist) + hlist_for_each_entry_rcu(li, &l->list, hlist) ++s->prefixes; } else { const struct tnode *tn = (const struct tnode *) n; @@ -2200,10 +2195,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - struct hlist_node *node; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct trie *t = (struct trie *) tb->tb_data; struct trie_stat stat; @@ -2245,10 +2239,9 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - struct hlist_node *node; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct rt_trie_node *n; for (n = fib_trie_get_first(iter, @@ -2298,7 +2291,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* new hash chain */ while (++h < FIB_TABLE_HASHSZ) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) goto found; @@ -2381,13 +2374,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) } else { struct leaf *l = (struct leaf *) n; struct leaf_info *li; - struct hlist_node *node; __be32 val = htonl(l->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2532,7 +2524,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) { struct leaf *l = v; struct leaf_info *li; - struct hlist_node *node; if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " @@ -2541,7 +2532,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; } - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { struct fib_alias *fa; __be32 mask, prefix; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 11cb4979a465..7d1874be1df3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -57,7 +57,6 @@ int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) { struct sock *sk2; - struct hlist_node *node; int reuse = sk->sk_reuse; int reuseport = sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); @@ -69,7 +68,7 @@ int inet_csk_bind_conflict(const struct sock *sk, * one this bucket belongs to. */ - sk_for_each_bound(sk2, node, &tb->owners) { + sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || @@ -95,7 +94,7 @@ int inet_csk_bind_conflict(const struct sock *sk, } } } - return node != NULL; + return sk2 != NULL; } EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); @@ -106,7 +105,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_bind_hashbucket *head; - struct hlist_node *node; struct inet_bind_bucket *tb; int ret, attempts = 5; struct net *net = sock_net(sk); @@ -129,7 +127,7 @@ again: head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == rover) { if (((tb->fastreuse > 0 && sk->sk_reuse && @@ -183,7 +181,7 @@ have_snum: head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == snum) goto tb_found; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 2e453bde6992..245ae078a07f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -33,9 +33,9 @@ static void inet_frag_secret_rebuild(unsigned long dummy) get_random_bytes(&f->rnd, sizeof(u32)); for (i = 0; i < INETFRAGS_HASHSZ; i++) { struct inet_frag_queue *q; - struct hlist_node *p, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { + hlist_for_each_entry_safe(q, n, &f->hash[i], list) { unsigned int hval = f->hashfn(q); if (hval != i) { @@ -203,7 +203,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, { struct inet_frag_queue *qp; #ifdef CONFIG_SMP - struct hlist_node *n; #endif unsigned int hash; @@ -219,7 +218,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - hlist_for_each_entry(qp, n, &f->hash[hash], list) { + hlist_for_each_entry(qp, &f->hash[hash], list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); write_unlock(&f->lock); @@ -278,9 +277,8 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, __releases(&f->lock) { struct inet_frag_queue *q; - struct hlist_node *n; - hlist_for_each_entry(q, n, &f->hash[hash], list) { + hlist_for_each_entry(q, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); read_unlock(&f->lock); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 0ce0595d9861..6af375afeeef 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -120,13 +120,12 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) * that the listener socket's icsk_bind_hash is the same * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ - struct hlist_node *node; - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), sock_net(sk)) && tb->port == port) break; } - if (!node) { + if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, sock_net(sk), head, port); if (!tb) { @@ -493,7 +492,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int i, remaining, low, high, port; static u32 hint; u32 offset = hint + port_offset; - struct hlist_node *node; struct inet_timewait_sock *tw = NULL; inet_get_local_port_range(&low, &high); @@ -512,7 +510,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * because the established check is already * unique enough. */ - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), net) && tb->port == port) { if (tb->fastreuse >= 0 || diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2784db3155fb..1f27c9f4afd0 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -216,7 +216,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, const int slot) { struct inet_timewait_sock *tw; - struct hlist_node *node; unsigned int killed; int ret; @@ -229,7 +228,7 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, killed = 0; ret = 0; rescan: - inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { + inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { __inet_twsk_del_dead_node(tw); spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); @@ -438,10 +437,10 @@ void inet_twdr_twcal_tick(unsigned long data) for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { if (time_before_eq(j, now)) { - struct hlist_node *node, *safe; + struct hlist_node *safe; struct inet_timewait_sock *tw; - inet_twsk_for_each_inmate_safe(tw, node, safe, + inet_twsk_for_each_inmate_safe(tw, safe, &twdr->twcal_row[slot]) { __inet_twsk_del_dead_node(tw); __inet_twsk_kill(tw, twdr->hashinfo); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 53ddebc292b6..dd44e0ab600c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -111,9 +111,7 @@ EXPORT_SYMBOL_GPL(raw_unhash_sk); static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { - struct hlist_node *node; - - sk_for_each_from(sk, node) { + sk_for_each_from(sk) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && @@ -914,9 +912,7 @@ static struct sock *raw_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { - struct hlist_node *node; - - sk_for_each(sk, node, &state->h->ht[state->bucket]) + sk_for_each(sk, &state->h->ht[state->bucket]) if (sock_net(sk) == seq_file_net(seq)) goto found; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 145d3bf8df86..4a8ec457310f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -954,7 +954,6 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct hlist_node *pos; unsigned int size = sizeof(struct in_addr); struct tcp_md5sig_info *md5sig; @@ -968,7 +967,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { + hlist_for_each_entry_rcu(key, &md5sig->head, node) { if (key->family != family) continue; if (!memcmp(&key->addr, addr, size)) @@ -1069,14 +1068,14 @@ static void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct hlist_node *pos, *n; + struct hlist_node *n; struct tcp_md5sig_info *md5sig; md5sig = rcu_dereference_protected(tp->md5sig_info, 1); if (!hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); - hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { + hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4dc0d44a5d31..f2c7e615f902 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1419,11 +1419,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { struct inet6_ifaddr *ifp; - struct hlist_node *node; unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); - hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -1445,9 +1444,8 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, { unsigned int hash = inet6_addr_hash(addr); struct inet6_ifaddr *ifp; - struct hlist_node *node; - hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -1487,10 +1485,9 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add { struct inet6_ifaddr *ifp, *result = NULL; unsigned int hash = inet6_addr_hash(addr); - struct hlist_node *node; rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -2907,11 +2904,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { struct hlist_head *h = &inet6_addr_lst[i]; - struct hlist_node *n; spin_lock_bh(&addrconf_hash_lock); restart: - hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { + hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); addrconf_del_timer(ifa); @@ -3218,8 +3214,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - struct hlist_node *n; - hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -3244,9 +3239,8 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, { struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - struct hlist_node *n = &ifa->addr_lst; - hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) { + hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; state->offset++; @@ -3255,7 +3249,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, while (++state->bucket < IN6_ADDR_HSIZE) { state->offset = 0; - hlist_for_each_entry_rcu_bh(ifa, n, + hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -3357,11 +3351,10 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { int ret = 0; struct inet6_ifaddr *ifp = NULL; - struct hlist_node *n; unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -3383,7 +3376,6 @@ static void addrconf_verify(unsigned long foo) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; - struct hlist_node *node; int i; rcu_read_lock_bh(); @@ -3395,7 +3387,7 @@ static void addrconf_verify(unsigned long foo) for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, node, + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; @@ -3866,7 +3858,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -3876,7 +3867,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (h > s_h || idx > s_idx) @@ -4222,7 +4213,6 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -4231,7 +4221,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; idev = __in6_dev_get(dev); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index ff76eecfd622..aad64352cb60 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -173,9 +173,8 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex) { - struct hlist_node *pos; struct ip6addrlbl_entry *p; - hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (__ip6addrlbl_match(net, p, addr, type, ifindex)) return p; } @@ -261,9 +260,9 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) if (hlist_empty(&ip6addrlbl_table.head)) { hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); } else { - struct hlist_node *pos, *n; + struct hlist_node *n; struct ip6addrlbl_entry *p = NULL; - hlist_for_each_entry_safe(p, pos, n, + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && @@ -319,13 +318,13 @@ static int __ip6addrlbl_del(struct net *net, int ifindex) { struct ip6addrlbl_entry *p = NULL; - struct hlist_node *pos, *n; + struct hlist_node *n; int ret = -ESRCH; ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); - hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && @@ -380,11 +379,11 @@ static int __net_init ip6addrlbl_net_init(struct net *net) static void __net_exit ip6addrlbl_net_exit(struct net *net) { struct ip6addrlbl_entry *p = NULL; - struct hlist_node *pos, *n; + struct hlist_node *n; /* Remove all labels belonging to the exiting net */ spin_lock(&ip6addrlbl_table.lock); - hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (net_eq(ip6addrlbl_net(p), net)) { hlist_del_rcu(&p->list); ip6addrlbl_put(p); @@ -505,12 +504,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct ip6addrlbl_entry *p; - struct hlist_node *pos; int idx = 0, s_idx = cb->args[0]; int err; rcu_read_lock(); - hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (idx >= s_idx && net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index b386a2ce4c6f..9bfab19ff3c0 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -31,7 +31,6 @@ int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) { const struct sock *sk2; - const struct hlist_node *node; int reuse = sk->sk_reuse; int reuseport = sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); @@ -41,7 +40,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, * See comment in inet_csk_bind_conflict about sock lookup * vs net namespaces issues. */ - sk_for_each_bound(sk2, node, &tb->owners) { + sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || @@ -58,7 +57,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, } } - return node != NULL; + return sk2 != NULL; } EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 710cafd2e1a9..192dd1a0e188 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -224,7 +224,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) { struct fib6_table *tb; struct hlist_head *head; - struct hlist_node *node; unsigned int h; if (id == 0) @@ -232,7 +231,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) h = id & (FIB6_TABLE_HASHSZ - 1); rcu_read_lock(); head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (tb->tb6_id == id) { rcu_read_unlock(); return tb; @@ -363,7 +362,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; struct fib6_table *tb; - struct hlist_node *node; struct hlist_head *head; int res = 0; @@ -398,7 +396,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (e < s_e) goto next; res = fib6_dump_table(tb, skb, cb); @@ -1520,14 +1518,13 @@ void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg int prune, void *arg) { struct fib6_table *table; - struct hlist_node *node; struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(table, head, tb6_hlist) { read_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, func, prune, arg); @@ -1540,14 +1537,13 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { struct fib6_table *table; - struct hlist_node *node; struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, func, prune, arg); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c65907db8c44..330b5e7b7df6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -71,10 +71,9 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; bool is_multicast = ipv6_addr_is_multicast(loc_addr); - sk_for_each_from(sk, node) + sk_for_each_from(sk) if (inet_sk(sk)->inet_num == num) { struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 6cc48012b730..de2bcfaaf759 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -89,9 +89,8 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos; - hlist_for_each_entry_rcu(x6spi, pos, + hlist_for_each_entry_rcu(x6spi, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { if (xfrm6_addr_equal(&x6spi->addr, saddr)) @@ -120,9 +119,8 @@ static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; int index = xfrm6_tunnel_spi_hash_byspi(spi); - struct hlist_node *pos; - hlist_for_each_entry(x6spi, pos, + hlist_for_each_entry(x6spi, &xfrm6_tn->spi_byspi[index], list_byspi) { if (x6spi->spi == spi) @@ -203,11 +201,11 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos, *n; + struct hlist_node *n; spin_lock_bh(&xfrm6_tunnel_spi_lock); - hlist_for_each_entry_safe(x6spi, pos, n, + hlist_for_each_entry_safe(x6spi, n, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index dfd6faaf0ea7..f547a47d381c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -228,9 +228,8 @@ static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc, __be16 port) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &intrfc->if_sklist) + sk_for_each(s, &intrfc->if_sklist) if (ipx_sk(s)->port == port) goto found; s = NULL; @@ -259,12 +258,11 @@ static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc, __be16 port) { struct sock *s; - struct hlist_node *node; ipxitf_hold(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(s, node, &intrfc->if_sklist) { + sk_for_each(s, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == port && @@ -282,14 +280,14 @@ found: static void __ipxitf_down(struct ipx_interface *intrfc) { struct sock *s; - struct hlist_node *node, *t; + struct hlist_node *t; /* Delete all routes associated with this interface */ ipxrtr_del_routes(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); /* error sockets */ - sk_for_each_safe(s, node, t, &intrfc->if_sklist) { + sk_for_each_safe(s, t, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); s->sk_err = ENOLINK; @@ -385,12 +383,11 @@ static int ipxitf_demux_socket(struct ipx_interface *intrfc, int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node, IPX_NODE_LEN); struct sock *s; - struct hlist_node *node; int rc; spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(s, node, &intrfc->if_sklist) { + sk_for_each(s, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == ipx->ipx_dest.sock && @@ -446,12 +443,11 @@ static struct sock *ncp_connection_hack(struct ipx_interface *intrfc, connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8); if (connection) { - struct hlist_node *node; /* Now we have to look for a special NCP connection handling * socket. Only these sockets have ipx_ncp_conn != 0, set by * SIOCIPXNCPCONN. */ spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(sk, node, &intrfc->if_sklist) + sk_for_each(sk, &intrfc->if_sklist) if (ipx_sk(sk)->ipx_ncp_conn == connection) { sock_hold(sk); goto found; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 02ff7f2f60d4..65e8833a2510 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -103,19 +103,18 @@ out: static __inline__ struct sock *ipx_get_socket_idx(loff_t pos) { struct sock *s = NULL; - struct hlist_node *node; struct ipx_interface *i; list_for_each_entry(i, &ipx_interfaces, node) { spin_lock_bh(&i->if_sklist_lock); - sk_for_each(s, node, &i->if_sklist) { + sk_for_each(s, &i->if_sklist) { if (!pos) break; --pos; } spin_unlock_bh(&i->if_sklist_lock); if (!pos) { - if (node) + if (s) goto found; break; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index cd6f7a991d80..a7d11ffe4284 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -156,14 +156,13 @@ static int afiucv_pm_freeze(struct device *dev) { struct iucv_sock *iucv; struct sock *sk; - struct hlist_node *node; int err = 0; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "afiucv_pm_freeze\n"); #endif read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { iucv = iucv_sk(sk); switch (sk->sk_state) { case IUCV_DISCONN: @@ -194,13 +193,12 @@ static int afiucv_pm_freeze(struct device *dev) static int afiucv_pm_restore_thaw(struct device *dev) { struct sock *sk; - struct hlist_node *node; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "afiucv_pm_restore_thaw\n"); #endif read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { switch (sk->sk_state) { case IUCV_CONNECTED: sk->sk_err = EPIPE; @@ -390,9 +388,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, static struct sock *__iucv_get_sock_by_name(char *nm) { struct sock *sk; - struct hlist_node *node; - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (!memcmp(&iucv_sk(sk)->src_name, nm, 8)) return sk; @@ -1678,7 +1675,6 @@ static int iucv_callback_connreq(struct iucv_path *path, unsigned char user_data[16]; unsigned char nuser_data[16]; unsigned char src_name[8]; - struct hlist_node *node; struct sock *sk, *nsk; struct iucv_sock *iucv, *niucv; int err; @@ -1689,7 +1685,7 @@ static int iucv_callback_connreq(struct iucv_path *path, read_lock(&iucv_sk_list.lock); iucv = NULL; sk = NULL; - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (sk->sk_state == IUCV_LISTEN && !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { /* @@ -2115,7 +2111,6 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct hlist_node *node; struct sock *sk; struct iucv_sock *iucv; struct af_iucv_trans_hdr *trans_hdr; @@ -2132,7 +2127,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, iucv = NULL; sk = NULL; read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { if (trans_hdr->flags == AF_IUCV_FLAG_SYN) { if ((!memcmp(&iucv_sk(sk)->src_name, trans_hdr->destAppName, 8)) && @@ -2225,10 +2220,9 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, struct sk_buff *list_skb; struct sk_buff *nskb; unsigned long flags; - struct hlist_node *node; read_lock_irqsave(&iucv_sk_list.lock, flags); - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (sk == isk) { iucv = iucv_sk(sk); break; @@ -2299,14 +2293,13 @@ static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = (struct net_device *)ptr; - struct hlist_node *node; struct sock *sk; struct iucv_sock *iucv; switch (event) { case NETDEV_REBOOT: case NETDEV_GOING_DOWN: - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { iucv = iucv_sk(sk); if ((iucv->hs_dev == event_dev) && (sk->sk_state == IUCV_CONNECTED)) { diff --git a/net/key/af_key.c b/net/key/af_key.c index 9ef79851f297..556fdafdd1ea 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -225,7 +225,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; - struct hlist_node *node; struct sk_buff *skb2 = NULL; int err = -ESRCH; @@ -236,7 +235,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return -ENOMEM; rcu_read_lock(); - sk_for_each_rcu(sk, node, &net_pfkey->table) { + sk_for_each_rcu(sk, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index dcfd64e83ab7..d36875f3427e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -221,10 +221,9 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) struct hlist_head *session_list = l2tp_session_id_hash_2(pn, session_id); struct l2tp_session *session; - struct hlist_node *walk; rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) { + hlist_for_each_entry_rcu(session, session_list, global_hlist) { if (session->session_id == session_id) { rcu_read_unlock_bh(); return session; @@ -253,7 +252,6 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn { struct hlist_head *session_list; struct l2tp_session *session; - struct hlist_node *walk; /* In L2TPv3, session_ids are unique over all tunnels and we * sometimes need to look them up before we know the @@ -264,7 +262,7 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn session_list = l2tp_session_id_hash(tunnel, session_id); read_lock_bh(&tunnel->hlist_lock); - hlist_for_each_entry(session, walk, session_list, hlist) { + hlist_for_each_entry(session, session_list, hlist) { if (session->session_id == session_id) { read_unlock_bh(&tunnel->hlist_lock); return session; @@ -279,13 +277,12 @@ EXPORT_SYMBOL_GPL(l2tp_session_find); struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; - struct hlist_node *walk; struct l2tp_session *session; int count = 0; read_lock_bh(&tunnel->hlist_lock); for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) { + hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { read_unlock_bh(&tunnel->hlist_lock); return session; @@ -306,12 +303,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); int hash; - struct hlist_node *walk; struct l2tp_session *session; rcu_read_lock_bh(); for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { - hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) { + hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { rcu_read_unlock_bh(); return session; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index f7ac8f42fee2..7f41b7051269 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -49,10 +49,9 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) { - struct hlist_node *node; struct sock *sk; - sk_for_each_bound(sk, node, &l2tp_ip_bind_table) { + sk_for_each_bound(sk, &l2tp_ip_bind_table) { struct inet_sock *inet = inet_sk(sk); struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8ee4a86ae996..41f2f8126ebc 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -60,10 +60,9 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, int dif, u32 tunnel_id) { - struct hlist_node *node; struct sock *sk; - sk_for_each_bound(sk, node, &l2tp_ip6_bind_table) { + sk_for_each_bound(sk, &l2tp_ip6_bind_table) { struct in6_addr *addr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 7c5073badc73..78be45cda5c1 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -393,12 +393,11 @@ static void llc_sap_mcast(struct llc_sap *sap, { int i = 0, count = 256 / sizeof(struct sock *); struct sock *sk, *stack[count]; - struct hlist_node *node; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); - hlist_for_each_entry(llc, node, dev_hb, dev_hash_node) { + hlist_for_each_entry(llc, dev_hb, dev_hash_node) { sk = &llc->sk; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 6b3c4e119c63..dc7c8df40c2c 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -72,9 +72,9 @@ static inline struct mesh_table *resize_dereference_mpp_paths(void) * it's used twice. So it is illegal to do * for_each_mesh_entry(rcu_dereference(...), ...) */ -#define for_each_mesh_entry(tbl, p, node, i) \ +#define for_each_mesh_entry(tbl, node, i) \ for (i = 0; i <= tbl->hash_mask; i++) \ - hlist_for_each_entry_rcu(node, p, &tbl->hash_buckets[i], list) + hlist_for_each_entry_rcu(node, &tbl->hash_buckets[i], list) static struct mesh_table *mesh_table_alloc(int size_order) @@ -139,7 +139,7 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) } if (free_leafs) { spin_lock_bh(&tbl->gates_lock); - hlist_for_each_entry_safe(gate, p, q, + hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { hlist_del(&gate->list); kfree(gate); @@ -333,12 +333,11 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; - struct hlist_node *n; struct hlist_head *bucket; struct mpath_node *node; bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; - hlist_for_each_entry_rcu(node, n, bucket, list) { + hlist_for_each_entry_rcu(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) { @@ -389,11 +388,10 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { struct mesh_table *tbl = rcu_dereference(mesh_paths); struct mpath_node *node; - struct hlist_node *p; int i; int j = 0; - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { if (sdata && node->mpath->sdata != sdata) continue; if (j++ == idx) { @@ -417,13 +415,12 @@ int mesh_path_add_gate(struct mesh_path *mpath) { struct mesh_table *tbl; struct mpath_node *gate, *new_gate; - struct hlist_node *n; int err; rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - hlist_for_each_entry_rcu(gate, n, tbl->known_gates, list) + hlist_for_each_entry_rcu(gate, tbl->known_gates, list) if (gate->mpath == mpath) { err = -EEXIST; goto err_rcu; @@ -460,9 +457,9 @@ err_rcu: static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { struct mpath_node *gate; - struct hlist_node *p, *q; + struct hlist_node *q; - hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) { + hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { if (gate->mpath != mpath) continue; spin_lock_bh(&tbl->gates_lock); @@ -504,7 +501,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; - struct hlist_node *n; int grow = 0; int err = 0; u32 hash_idx; @@ -550,7 +546,7 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) spin_lock(&tbl->hashwlock[hash_idx]); err = -EEXIST; - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) @@ -640,7 +636,6 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; - struct hlist_node *n; int grow = 0; int err = 0; u32 hash_idx; @@ -680,7 +675,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, spin_lock(&tbl->hashwlock[hash_idx]); err = -EEXIST; - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) @@ -725,14 +720,13 @@ void mesh_plink_broken(struct sta_info *sta) static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; struct ieee80211_sub_if_data *sdata = sta->sdata; int i; __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE); rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (rcu_dereference(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && @@ -792,13 +786,12 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; rcu_read_lock(); read_lock_bh(&pathtbl_resize_lock); tbl = resize_dereference_mesh_paths(); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (rcu_dereference(mpath->next_hop) == sta) { spin_lock(&tbl->hashwlock[i]); @@ -815,11 +808,10 @@ static void table_flush_by_iface(struct mesh_table *tbl, { struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; WARN_ON(!rcu_read_lock_held()); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (mpath->sdata != sdata) continue; @@ -865,7 +857,6 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) struct mesh_path *mpath; struct mpath_node *node; struct hlist_head *bucket; - struct hlist_node *n; int hash_idx; int err = 0; @@ -875,7 +866,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) bucket = &tbl->hash_buckets[hash_idx]; spin_lock(&tbl->hashwlock[hash_idx]); - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(addr, mpath->dst)) { @@ -920,7 +911,6 @@ void mesh_path_tx_pending(struct mesh_path *mpath) int mesh_path_send_to_gates(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; - struct hlist_node *n; struct mesh_table *tbl; struct mesh_path *from_mpath = mpath; struct mpath_node *gate = NULL; @@ -935,7 +925,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) if (!known_gates) return -EHOSTUNREACH; - hlist_for_each_entry_rcu(gate, n, known_gates, list) { + hlist_for_each_entry_rcu(gate, known_gates, list) { if (gate->mpath->sdata != sdata) continue; @@ -951,7 +941,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) } } - hlist_for_each_entry_rcu(gate, n, known_gates, list) + hlist_for_each_entry_rcu(gate, known_gates, list) if (gate->mpath->sdata == sdata) { mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst); mesh_path_tx_pending(gate->mpath); @@ -1096,12 +1086,11 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { if (node->mpath->sdata != sdata) continue; mpath = node->mpath; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9f00db7e03f2..704e514e02ab 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -259,13 +259,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp; - struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && @@ -344,13 +343,12 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp; - struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (!ip_vs_conn_net_eq(cp, p->net)) continue; if (p->pe_data && p->pe->ct_match) { @@ -394,7 +392,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp, *ret=NULL; - struct hlist_node *n; /* * Check for "full" addressed entries @@ -403,7 +400,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && @@ -953,11 +950,10 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) int idx; struct ip_vs_conn *cp; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *n; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; return cp; @@ -981,7 +977,6 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *e; struct hlist_head *l = iter->l; int idx; @@ -990,15 +985,15 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if ((e = cp->c_list.next)) - return hlist_entry(e, struct ip_vs_conn, c_list); + if (cp->c_list.next) + return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); idx = l - ip_vs_conn_tab; ct_read_unlock_bh(idx); while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); - hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } @@ -1200,14 +1195,13 @@ void ip_vs_random_dropentry(struct net *net) */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned int hash = net_random() & ip_vs_conn_tab_mask; - struct hlist_node *n; /* * Lock is actually needed in this loop. */ ct_write_lock_bh(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1255,14 +1249,12 @@ static void ip_vs_conn_flush(struct net *net) flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - struct hlist_node *n; - /* * Lock is actually needed in this loop. */ ct_write_lock_bh(idx); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3921e5bc1235..8c10e3db3d9b 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -90,14 +90,13 @@ __nf_ct_expect_find(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; - struct hlist_node *n; unsigned int h; if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && nf_ct_zone(i->master) == zone) return i; @@ -130,14 +129,13 @@ nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; - struct hlist_node *n; unsigned int h; if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && nf_ct_zone(i->master) == zone) { @@ -172,13 +170,13 @@ void nf_ct_remove_expectations(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; /* Optimization: most connection never expect any others. */ if (!help) return; - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -348,9 +346,8 @@ static void evict_oldest_expect(struct nf_conn *master, { struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_expect *exp, *last = NULL; - struct hlist_node *n; - hlist_for_each_entry(exp, n, &master_help->expectations, lnode) { + hlist_for_each_entry(exp, &master_help->expectations, lnode) { if (exp->class == new->class) last = exp; } @@ -369,7 +366,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int h; int ret = 1; @@ -378,7 +375,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { if (del_timer(&i->timeout)) { nf_ct_unlink_expect(i); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 013cdf69fe29..a9740bd6fe54 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -116,14 +116,13 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; - struct hlist_node *n; unsigned int h; if (!nf_ct_helper_count) return NULL; h = helper_hash(tuple); - hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) { + hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) return helper; } @@ -134,11 +133,10 @@ struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; - struct hlist_node *n; unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { if (!strcmp(h->name, name) && h->tuple.src.l3num == l3num && h->tuple.dst.protonum == protonum) @@ -357,7 +355,6 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { int ret = 0; struct nf_conntrack_helper *cur; - struct hlist_node *n; unsigned int h = helper_hash(&me->tuple); BUG_ON(me->expect_policy == NULL); @@ -365,7 +362,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); mutex_lock(&nf_ct_helper_mutex); - hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) { + hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && cur->tuple.src.l3num == me->tuple.src.l3num && cur->tuple.dst.protonum == me->tuple.dst.protonum) { @@ -386,13 +383,13 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; - const struct hlist_node *n, *next; + const struct hlist_node *next; const struct hlist_nulls_node *nn; unsigned int i; /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((rcu_dereference_protected( diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5d60e04f9679..9904b15f600e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2370,14 +2370,13 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; rcu_read_lock(); last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]], + hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -2510,7 +2509,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct hlist_node *n, *next; + struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; unsigned int i; u16 zone; @@ -2557,7 +2556,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* delete all expectations for this helper */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); @@ -2575,7 +2574,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* This basically means we have to flush everything*/ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 069229d919b6..0e7d423324c3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -855,11 +855,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct, { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; int found = 0; spin_lock_bh(&nf_conntrack_lock); - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (exp->class != SIP_EXPECT_SIGNALLING || !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || exp->tuple.dst.protonum != proto || @@ -881,10 +881,10 @@ static void flush_expectations(struct nf_conn *ct, bool media) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; spin_lock_bh(&nf_conntrack_lock); - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) continue; if (!del_timer(&exp->timeout)) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5f2f9109f461..8d5769c6d16e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -191,9 +191,8 @@ find_appropriate_src(struct net *net, u16 zone, unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; - const struct hlist_node *n; - hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 945950a8b1f1..a191b6db657e 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -282,7 +282,6 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - struct hlist_node *n; int ret = 0, i; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) @@ -296,7 +295,7 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, rcu_read_lock(); for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -452,13 +451,12 @@ static int nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conntrack_helper *cur, *last; - struct hlist_node *n; rcu_read_lock(); last = (struct nf_conntrack_helper *)cb->args[1]; for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(cur, n, + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[cb->args[0]], hnode) { /* skip non-userspace conntrack helpers. */ @@ -495,7 +493,6 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, { int ret = -ENOENT, i; struct nf_conntrack_helper *cur; - struct hlist_node *n; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; @@ -520,7 +517,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -568,7 +565,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; int i, j = 0, ret; @@ -585,7 +582,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -654,13 +651,13 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; int i; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); for (i=0; iflags & NF_CT_HELPER_F_USERSPACE)) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 92fd8eca0d31..f248db572972 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -87,11 +87,10 @@ static struct nfulnl_instance * __instance_lookup(u_int16_t group_num) { struct hlist_head *head; - struct hlist_node *pos; struct nfulnl_instance *inst; head = &instance_table[instance_hashfn(group_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->group_num == group_num) return inst; } @@ -717,11 +716,11 @@ nfulnl_rcv_nl_event(struct notifier_block *this, /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; + struct hlist_node *t2; struct nfulnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + hlist_for_each_entry_safe(inst, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && (n->portid == inst->peer_portid)) __instance_destroy(inst); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 3158d87b56a8..858fd52c1040 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -80,11 +80,10 @@ static struct nfqnl_instance * instance_lookup(u_int16_t queue_num) { struct hlist_head *head; - struct hlist_node *pos; struct nfqnl_instance *inst; head = &instance_table[instance_hashfn(queue_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->queue_num == queue_num) return inst; } @@ -583,11 +582,10 @@ nfqnl_dev_drop(int ifindex) rcu_read_lock(); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_rcu(inst, tmp, head, hlist) + hlist_for_each_entry_rcu(inst, head, hlist) nfqnl_flush(inst, dev_cmp, ifindex); } @@ -627,11 +625,11 @@ nfqnl_rcv_nl_event(struct notifier_block *this, /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; + struct hlist_node *t2; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + hlist_for_each_entry_safe(inst, t2, head, hlist) { if ((n->net == &init_net) && (n->portid == inst->peer_portid)) __instance_destroy(inst); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index f264032b8c56..370adf622cef 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -43,12 +43,11 @@ static void xt_rateest_hash_insert(struct xt_rateest *est) struct xt_rateest *xt_rateest_lookup(const char *name) { struct xt_rateest *est; - struct hlist_node *n; unsigned int h; h = xt_rateest_hash(name); mutex_lock(&xt_rateest_mutex); - hlist_for_each_entry(est, n, &rateest_hash[h], list) { + hlist_for_each_entry(est, &rateest_hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; mutex_unlock(&xt_rateest_mutex); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 70b5591a2586..c40b2695633b 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -101,7 +101,7 @@ static int count_them(struct net *net, { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; - struct hlist_node *pos, *n; + struct hlist_node *n; struct nf_conn *found_ct; struct hlist_head *hash; bool addit = true; @@ -115,7 +115,7 @@ static int count_them(struct net *net, rcu_read_lock(); /* check the saved connections */ - hlist_for_each_entry_safe(conn, pos, n, hash, node) { + hlist_for_each_entry_safe(conn, n, hash, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); found_ct = NULL; @@ -258,14 +258,14 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; - struct hlist_node *pos, *n; + struct hlist_node *n; struct hlist_head *hash = info->data->iphash; unsigned int i; nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) { + hlist_for_each_entry_safe(conn, n, &hash[i], node) { hlist_del(&conn->node); kfree(conn); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 98218c896d2e..f330e8beaf69 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -141,11 +141,10 @@ dsthash_find(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) { struct dsthash_ent *ent; - struct hlist_node *pos; u_int32_t hash = hash_dst(ht, dst); if (!hlist_empty(&ht->hash[hash])) { - hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node) + hlist_for_each_entry_rcu(ent, &ht->hash[hash], node) if (dst_cmp(ent, dst)) { spin_lock(&ent->lock); return ent; @@ -297,8 +296,8 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, spin_lock_bh(&ht->lock); for (i = 0; i < ht->cfg.size; i++) { struct dsthash_ent *dh; - struct hlist_node *pos, *n; - hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) { + struct hlist_node *n; + hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { if ((*select)(ht, dh)) dsthash_free(ht, dh); } @@ -343,9 +342,8 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - struct hlist_node *pos; - hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) { + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { if (!strcmp(name, hinfo->pde->name) && hinfo->family == family) { hinfo->use++; @@ -821,10 +819,9 @@ static int dl_seq_show(struct seq_file *s, void *v) struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; - struct hlist_node *pos; if (!hlist_empty(&htable->hash[*bucket])) { - hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) + hlist_for_each_entry(ent, &htable->hash[*bucket], node) if (dl_seq_real_show(ent, htable->family, s)) return -1; } @@ -877,7 +874,6 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { struct xt_hashlimit_htable *hinfo; - struct hlist_node *pos; struct proc_dir_entry *pde; struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); @@ -890,7 +886,7 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net) if (pde == NULL) pde = hashlimit_net->ip6t_hashlimit; - hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) remove_proc_entry(hinfo->pde->name, pde); hashlimit_net->ipt_hashlimit = NULL; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8097b4f3ead4..1e3fd5bfcd86 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -248,11 +248,10 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; - struct hlist_node *node; read_lock(&nl_table_lock); head = nl_portid_hashfn(hash, portid); - sk_for_each(sk, node, head) { + sk_for_each(sk, head) { if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; @@ -312,9 +311,9 @@ static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) for (i = 0; i <= omask; i++) { struct sock *sk; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; - sk_for_each_safe(sk, node, tmp, &otable[i]) + sk_for_each_safe(sk, tmp, &otable[i]) __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } @@ -344,7 +343,6 @@ static void netlink_update_listeners(struct sock *sk) { struct netlink_table *tbl = &nl_table[sk->sk_protocol]; - struct hlist_node *node; unsigned long mask; unsigned int i; struct listeners *listeners; @@ -355,7 +353,7 @@ netlink_update_listeners(struct sock *sk) for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { mask = 0; - sk_for_each_bound(sk, node, &tbl->mc_list) { + sk_for_each_bound(sk, &tbl->mc_list) { if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) mask |= nlk_sk(sk)->groups[i]; } @@ -371,18 +369,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; - struct hlist_node *node; int len; netlink_table_grab(); head = nl_portid_hashfn(hash, portid); len = 0; - sk_for_each(osk, node, head) { + sk_for_each(osk, head) { if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } - if (node) + if (osk) goto err; err = -EBUSY; @@ -575,7 +572,6 @@ static int netlink_autobind(struct socket *sock) struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; - struct hlist_node *node; s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; @@ -584,7 +580,7 @@ retry: cond_resched(); netlink_table_grab(); head = nl_portid_hashfn(hash, portid); - sk_for_each(osk, node, head) { + sk_for_each(osk, head) { if (!net_eq(sock_net(osk), net)) continue; if (nlk_sk(osk)->portid == portid) { @@ -1101,7 +1097,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid { struct net *net = sock_net(ssk); struct netlink_broadcast_data info; - struct hlist_node *node; struct sock *sk; skb = netlink_trim(skb, allocation); @@ -1124,7 +1119,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid netlink_lock_table(); - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) + sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); consume_skb(skb); @@ -1200,7 +1195,6 @@ out: int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; - struct hlist_node *node; struct sock *sk; int ret = 0; @@ -1212,7 +1206,7 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) read_lock(&nl_table_lock); - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) + sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) ret += do_one_set_err(sk, &info); read_unlock(&nl_table_lock); @@ -1676,10 +1670,9 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups) void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) { struct sock *sk; - struct hlist_node *node; struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; - sk_for_each_bound(sk, node, &tbl->mc_list) + sk_for_each_bound(sk, &tbl->mc_list) netlink_update_socket_mc(nlk_sk(sk), group, 0); } @@ -1974,14 +1967,13 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) struct nl_seq_iter *iter = seq->private; int i, j; struct sock *s; - struct hlist_node *node; loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { - sk_for_each(s, node, &hash->table[j]) { + sk_for_each(s, &hash->table[j]) { if (sock_net(s) != seq_file_net(seq)) continue; if (off == pos) { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 297b07a029de..d1fa1d9ffd2e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -104,10 +104,9 @@ static void nr_remove_socket(struct sock *sk) static void nr_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) + sk_for_each(s, &nr_list) if (nr_sk(s)->device == dev) nr_disconnect(s, ENETUNREACH); spin_unlock_bh(&nr_list_lock); @@ -149,10 +148,9 @@ static void nr_insert_socket(struct sock *sk) static struct sock *nr_find_listener(ax25_address *addr) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) + sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { bh_lock_sock(s); @@ -170,10 +168,9 @@ found: static struct sock *nr_find_socket(unsigned char index, unsigned char id) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) { + sk_for_each(s, &nr_list) { struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { @@ -194,10 +191,9 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, ax25_address *dest) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) { + sk_for_each(s, &nr_list) { struct nr_sock *nr = nr_sk(s); if (nr->your_index == index && nr->your_id == id && diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 70ffff76a967..b976d5eff2de 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -49,10 +49,9 @@ static struct nr_node *nr_node_get(ax25_address *callsign) { struct nr_node *found = NULL; struct nr_node *nr_node; - struct hlist_node *node; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) + nr_node_for_each(nr_node, &nr_node_list) if (ax25cmp(callsign, &nr_node->callsign) == 0) { nr_node_hold(nr_node); found = nr_node; @@ -67,10 +66,9 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, { struct nr_neigh *found = NULL; struct nr_neigh *nr_neigh; - struct hlist_node *node; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(nr_neigh, node, &nr_neigh_list) + nr_neigh_for_each(nr_neigh, &nr_neigh_list) if (ax25cmp(callsign, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) { nr_neigh_hold(nr_neigh); @@ -114,10 +112,9 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, */ if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) { struct nr_node *nr_nodet; - struct hlist_node *node; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_nodet, node, &nr_node_list) { + nr_node_for_each(nr_nodet, &nr_node_list) { nr_node_lock(nr_nodet); for (i = 0; i < nr_nodet->count; i++) if (nr_nodet->routes[i].neighbour == nr_neigh) @@ -485,11 +482,11 @@ static int nr_dec_obs(void) { struct nr_neigh *nr_neigh; struct nr_node *s; - struct hlist_node *node, *nodet; + struct hlist_node *nodet; int i; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(s, node, nodet, &nr_node_list) { + nr_node_for_each_safe(s, nodet, &nr_node_list) { nr_node_lock(s); for (i = 0; i < s->count; i++) { switch (s->routes[i].obs_count) { @@ -540,15 +537,15 @@ static int nr_dec_obs(void) void nr_rt_device_down(struct net_device *dev) { struct nr_neigh *s; - struct hlist_node *node, *nodet, *node2, *node2t; + struct hlist_node *nodet, *node2t; struct nr_node *t; int i; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) { + nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { if (s->dev == dev) { spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node2, node2t, &nr_node_list) { + nr_node_for_each_safe(t, node2t, &nr_node_list) { nr_node_lock(t); for (i = 0; i < t->count; i++) { if (t->routes[i].neighbour == s) { @@ -737,11 +734,10 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) void nr_link_failed(ax25_cb *ax25, int reason) { struct nr_neigh *s, *nr_neigh = NULL; - struct hlist_node *node; struct nr_node *nr_node = NULL; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(s, node, &nr_neigh_list) { + nr_neigh_for_each(s, &nr_neigh_list) { if (s->ax25 == ax25) { nr_neigh_hold(s); nr_neigh = s; @@ -761,7 +757,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) return; } spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) { + nr_node_for_each(nr_node, &nr_node_list) { nr_node_lock(nr_node); if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh) @@ -1013,16 +1009,16 @@ void __exit nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; - struct hlist_node *node, *nodet; + struct hlist_node *nodet; spin_lock_bh(&nr_neigh_list_lock); spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node, nodet, &nr_node_list) { + nr_node_for_each_safe(t, nodet, &nr_node_list) { nr_node_lock(t); nr_remove_node_locked(t); nr_node_unlock(t); } - nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) { + nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { while(s->count) { s->count--; nr_neigh_put(s); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 746f5a2f9804..7f8266dd14cb 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -71,14 +71,14 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) { struct sock *sk; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct nfc_llcp_sock *llcp_sock; skb_queue_purge(&local->tx_queue); write_lock(&local->sockets.lock); - sk_for_each_safe(sk, node, tmp, &local->sockets.head) { + sk_for_each_safe(sk, tmp, &local->sockets.head) { llcp_sock = nfc_llcp_sock(sk); bh_lock_sock(sk); @@ -171,7 +171,6 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, u8 ssap, u8 dsap) { struct sock *sk; - struct hlist_node *node; struct nfc_llcp_sock *llcp_sock, *tmp_sock; pr_debug("ssap dsap %d %d\n", ssap, dsap); @@ -183,7 +182,7 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, llcp_sock = NULL; - sk_for_each(sk, node, &local->sockets.head) { + sk_for_each(sk, &local->sockets.head) { tmp_sock = nfc_llcp_sock(sk); if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) { @@ -272,7 +271,6 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, u8 *sn, size_t sn_len) { struct sock *sk; - struct hlist_node *node; struct nfc_llcp_sock *llcp_sock, *tmp_sock; pr_debug("sn %zd %p\n", sn_len, sn); @@ -284,7 +282,7 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, llcp_sock = NULL; - sk_for_each(sk, node, &local->sockets.head) { + sk_for_each(sk, &local->sockets.head) { tmp_sock = nfc_llcp_sock(sk); pr_debug("llcp sock %p\n", tmp_sock); @@ -601,14 +599,13 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu) void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, struct sk_buff *skb, u8 direction) { - struct hlist_node *node; struct sk_buff *skb_copy = NULL, *nskb; struct sock *sk; u8 *data; read_lock(&local->raw_sockets.lock); - sk_for_each(sk, node, &local->raw_sockets.head) { + sk_for_each(sk, &local->raw_sockets.head) { if (sk->sk_state != LLCP_BOUND) continue; @@ -697,11 +694,10 @@ static struct nfc_llcp_sock *nfc_llcp_connecting_sock_get(struct nfc_llcp_local { struct sock *sk; struct nfc_llcp_sock *llcp_sock; - struct hlist_node *node; read_lock(&local->connecting_sockets.lock); - sk_for_each(sk, node, &local->connecting_sockets.head) { + sk_for_each(sk, &local->connecting_sockets.head) { llcp_sock = nfc_llcp_sock(sk); if (llcp_sock->ssap == ssap) { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9dc537df46c4..e87a26506dba 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -158,11 +158,10 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; - struct hlist_node *n; struct hlist_head *head; head = vport_hash_bucket(dp, port_no); - hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { + hlist_for_each_entry_rcu(vport, head, dp_hash_node) { if (vport->port_no == port_no) return vport; } @@ -1386,9 +1385,9 @@ static void __dp_destroy(struct datapath *dp) for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - struct hlist_node *node, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) + hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) if (vport->port_no != OVSP_LOCAL) ovs_dp_detach_port(vport); } @@ -1825,10 +1824,9 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - struct hlist_node *n; j = 0; - hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).portid, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c3294cebc4f2..20605ecf100b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -299,10 +299,10 @@ void ovs_flow_tbl_destroy(struct flow_table *table) for (i = 0; i < table->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head = flex_array_get(table->buckets, i); - struct hlist_node *node, *n; + struct hlist_node *n; int ver = table->node_ver; - hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) { + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { hlist_del_rcu(&flow->hash_node[ver]); ovs_flow_free(flow); } @@ -332,7 +332,6 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la { struct sw_flow *flow; struct hlist_head *head; - struct hlist_node *n; int ver; int i; @@ -340,7 +339,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la while (*bucket < table->n_buckets) { i = 0; head = flex_array_get(table->buckets, *bucket); - hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { if (i < *last) { i++; continue; @@ -367,11 +366,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new for (i = 0; i < old->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head; - struct hlist_node *n; head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, n, head, hash_node[old_ver]) + hlist_for_each_entry(flow, head, hash_node[old_ver]) ovs_flow_tbl_insert(new, flow); } old->keep_flows = true; @@ -766,14 +764,13 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, struct sw_flow_key *key, int key_len) { struct sw_flow *flow; - struct hlist_node *n; struct hlist_head *head; u32 hash; hash = ovs_flow_hash(key, key_len); head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) { + hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->hash == hash && !memcmp(&flow->key, key, key_len)) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 70af0bedbac4..ba717cc038b3 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -86,9 +86,8 @@ struct vport *ovs_vport_locate(struct net *net, const char *name) { struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; - struct hlist_node *node; - hlist_for_each_entry_rcu(vport, node, bucket, hash_node) + hlist_for_each_entry_rcu(vport, bucket, hash_node) if (!strcmp(name, vport->ops->get_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c7bfeff10767..1d6793dbfbae 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3263,12 +3263,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) { struct sock *sk; - struct hlist_node *node; struct net_device *dev = data; struct net *net = dev_net(dev); rcu_read_lock(); - sk_for_each_rcu(sk, node, &net->packet.sklist) { + sk_for_each_rcu(sk, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { diff --git a/net/packet/diag.c b/net/packet/diag.c index 8db6e21c46bd..d3fcd1ebef7e 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -172,13 +172,12 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; - struct hlist_node *node; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); mutex_lock(&net->packet.sklist_lock); - sk_for_each(sk, node, &net->packet.sklist) { + sk_for_each(sk, &net->packet.sklist) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 576f22c9c76e..e77411735de8 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -640,11 +640,10 @@ static struct sock *pep_find_pipe(const struct hlist_head *hlist, const struct sockaddr_pn *dst, u8 pipe_handle) { - struct hlist_node *node; struct sock *sknode; u16 dobj = pn_sockaddr_get_object(dst); - sk_for_each(sknode, node, hlist) { + sk_for_each(sknode, hlist) { struct pep_sock *pnnode = pep_sk(sknode); /* Ports match, but addresses might not: */ diff --git a/net/phonet/socket.c b/net/phonet/socket.c index b7e982782255..1afd1381cdc7 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -76,7 +76,6 @@ static struct hlist_head *pn_hash_list(u16 obj) */ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) { - struct hlist_node *node; struct sock *sknode; struct sock *rval = NULL; u16 obj = pn_sockaddr_get_object(spn); @@ -84,7 +83,7 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) struct hlist_head *hlist = pn_hash_list(obj); rcu_read_lock(); - sk_for_each_rcu(sknode, node, hlist) { + sk_for_each_rcu(sknode, hlist) { struct pn_sock *pn = pn_sk(sknode); BUG_ON(!pn->sobject); /* unbound socket */ @@ -120,10 +119,9 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb) rcu_read_lock(); for (h = 0; h < PN_HASHSIZE; h++) { - struct hlist_node *node; struct sock *sknode; - sk_for_each(sknode, node, hlist) { + sk_for_each(sknode, hlist) { struct sk_buff *clone; if (!net_eq(sock_net(sknode), net)) @@ -543,12 +541,11 @@ static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); struct hlist_head *hlist = pnsocks.hlist; - struct hlist_node *node; struct sock *sknode; unsigned int h; for (h = 0; h < PN_HASHSIZE; h++) { - sk_for_each_rcu(sknode, node, hlist) { + sk_for_each_rcu(sknode, hlist) { if (!net_eq(net, sock_net(sknode))) continue; if (!pos) diff --git a/net/rds/bind.c b/net/rds/bind.c index 637bde56c9db..b5ad65a0067e 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -52,13 +52,12 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, struct rds_sock *insert) { struct rds_sock *rs; - struct hlist_node *node; struct hlist_head *head = hash_to_bucket(addr, port); u64 cmp; u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); rcu_read_lock(); - hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) { + hlist_for_each_entry_rcu(rs, head, rs_bound_node) { cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | be16_to_cpu(rs->rs_bound_port); diff --git a/net/rds/connection.c b/net/rds/connection.c index 9e07c756d1f9..642ad42c416b 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -69,9 +69,8 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head, struct rds_transport *trans) { struct rds_connection *conn, *ret = NULL; - struct hlist_node *pos; - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (conn->c_faddr == faddr && conn->c_laddr == laddr && conn->c_trans == trans) { ret = conn; @@ -376,7 +375,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, int want_send) { struct hlist_head *head; - struct hlist_node *pos; struct list_head *list; struct rds_connection *conn; struct rds_message *rm; @@ -390,7 +388,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (want_send) list = &conn->c_send_queue; else @@ -439,7 +437,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, { uint64_t buffer[(item_len + 7) / 8]; struct hlist_head *head; - struct hlist_node *pos; struct rds_connection *conn; size_t i; @@ -450,7 +447,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { /* XXX no c_lock usage.. */ if (!visitor(conn, buffer)) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index b768fe9d5e7a..cf68e6e4054a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -165,10 +165,9 @@ static void rose_remove_socket(struct sock *sk) void rose_kill_by_neigh(struct rose_neigh *neigh) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->neighbour == neigh) { @@ -186,10 +185,9 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) static void rose_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->device == dev) { @@ -246,10 +244,9 @@ static void rose_insert_socket(struct sock *sk) static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && @@ -258,7 +255,7 @@ static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) goto found; } - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && @@ -278,10 +275,9 @@ found: struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->lci == lci && rose->neighbour == neigh) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a181b484812a..c297e2a8e2a1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -545,7 +545,7 @@ static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { struct Qdisc_class_common *cl; - struct hlist_node *n, *next; + struct hlist_node *next; struct hlist_head *nhash, *ohash; unsigned int nsize, nmask, osize; unsigned int i, h; @@ -564,7 +564,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) sch_tree_lock(sch); for (i = 0; i < osize; i++) { - hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { h = qdisc_class_hash(cl->classid, nmask); hlist_add_head(&cl->hnode, &nhash[h]); } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 0e19948470b8..13aa47aa2ffb 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1041,14 +1041,13 @@ static void cbq_adjust_levels(struct cbq_class *this) static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) { struct cbq_class *cl; - struct hlist_node *n; unsigned int h; if (q->quanta[prio] == 0) return; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { /* BUGGGG... Beware! This expression suffer of * arithmetic overflows! */ @@ -1087,10 +1086,9 @@ static void cbq_sync_defmap(struct cbq_class *cl) continue; for (h = 0; h < q->clhash.hashsize; h++) { - struct hlist_node *n; struct cbq_class *c; - hlist_for_each_entry(c, n, &q->clhash.hash[h], + hlist_for_each_entry(c, &q->clhash.hash[h], common.hnode) { if (c->split == split && c->level < level && c->defmap & (1<active[prio] = NULL; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { qdisc_reset(cl->q); cl->next_alive = NULL; @@ -1697,7 +1694,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) static void cbq_destroy(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct cbq_class *cl; unsigned int h; @@ -1710,11 +1707,11 @@ static void cbq_destroy(struct Qdisc *sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) tcf_destroy_chain(&cl->filter_list); } for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], common.hnode) cbq_destroy_class(sch, cl); } @@ -2013,14 +2010,13 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; - struct hlist_node *n; unsigned int h; if (arg->stop) return; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 71e50c80315f..759b308d1a8d 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -293,14 +293,13 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -451,11 +450,10 @@ static void drr_reset_qdisc(struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) list_del(&cl->alist); qdisc_reset(cl->qdisc); @@ -468,13 +466,13 @@ static void drr_destroy_qdisc(struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int i; tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) drr_destroy_class(sch, cl); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6c2ec4510540..9facea03faeb 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1389,7 +1389,6 @@ static void hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct hfsc_sched *q = qdisc_priv(sch); - struct hlist_node *n; struct hfsc_class *cl; unsigned int i; @@ -1397,7 +1396,7 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { if (arg->count < arg->skip) { arg->count++; @@ -1523,11 +1522,10 @@ hfsc_reset_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } q->eligible = RB_ROOT; @@ -1540,16 +1538,16 @@ static void hfsc_destroy_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct hfsc_class *cl; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) tcf_destroy_chain(&cl->filter_list); } for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], cl_common.hnode) hfsc_destroy_class(sch, cl); } @@ -1564,12 +1562,11 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; struct hfsc_class *cl; - struct hlist_node *n; unsigned int i; sch->qstats.backlog = 0; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) sch->qstats.backlog += cl->qdisc->qstats.backlog; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 03c2692ca01e..571f1d211f4d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -949,11 +949,10 @@ static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { @@ -1218,7 +1217,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct htb_class *cl; unsigned int i; @@ -1232,11 +1231,11 @@ static void htb_destroy(struct Qdisc *sch) tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) tcf_destroy_chain(&cl->filter_list); } for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) htb_destroy_class(sch, cl); } @@ -1516,14 +1515,13 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6ed37652a4c3..e9a77f621c3d 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -276,9 +276,8 @@ static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q, u32 lmax, u32 weight) { struct qfq_aggregate *agg; - struct hlist_node *n; - hlist_for_each_entry(agg, n, &q->nonfull_aggs, nonfull_next) + hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next) if (agg->lmax == lmax && agg->class_weight == weight) return agg; @@ -670,14 +669,13 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -1376,11 +1374,10 @@ static unsigned int qfq_drop_from_slot(struct qfq_sched *q, struct hlist_head *slot) { struct qfq_aggregate *agg; - struct hlist_node *n; struct qfq_class *cl; unsigned int len; - hlist_for_each_entry(agg, n, slot, next) { + hlist_for_each_entry(agg, slot, next) { list_for_each_entry(cl, &agg->active, alist) { if (!cl->qdisc->ops->drop) @@ -1459,11 +1456,10 @@ static void qfq_reset_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen > 0) qfq_deactivate_class(q, cl); @@ -1477,13 +1473,13 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int i; tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) { qfq_destroy_class(sch, cl); } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 73aad3d16a45..2b3ef03c6098 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -332,7 +332,6 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( struct sctp_transport *t = NULL; struct sctp_hashbucket *head; struct sctp_ep_common *epb; - struct hlist_node *node; int hash; int rport; @@ -350,7 +349,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( rport); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { tmp = sctp_assoc(epb); if (tmp->ep != ep || rport != tmp->peer.port) continue; diff --git a/net/sctp/input.c b/net/sctp/input.c index 965bbbbe48d4..4b2c83146aa7 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -784,13 +784,12 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; - struct hlist_node *node; int hash; hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; @@ -876,7 +875,6 @@ static struct sctp_association *__sctp_lookup_association( struct sctp_ep_common *epb; struct sctp_association *asoc; struct sctp_transport *transport; - struct hlist_node *node; int hash; /* Optimize here for direct hit, only listening connections can @@ -886,7 +884,7 @@ static struct sctp_association *__sctp_lookup_association( ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { asoc = sctp_assoc(epb); transport = sctp_assoc_is_match(asoc, net, local, peer); if (transport) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 8c19e97262ca..ab3bba8cb0a8 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -213,7 +213,6 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; - struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_ep_hashsize) @@ -222,7 +221,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) head = &sctp_ep_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) @@ -321,7 +320,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_association *assoc; struct sock *sk; - struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_assoc_hashsize) @@ -330,7 +328,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) head = &sctp_assoc_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) @@ -436,7 +434,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; - struct hlist_node *node; struct sctp_transport *tsp; int hash = *(loff_t *)v; @@ -447,7 +444,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); rcu_read_lock(); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) continue; assoc = sctp_assoc(epb); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cedd9bf67b8c..c99458df3f3f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5882,8 +5882,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { struct sctp_bind_hashbucket *head; /* hash list */ - struct sctp_bind_bucket *pp; /* hash list port iterator */ - struct hlist_node *node; + struct sctp_bind_bucket *pp; unsigned short snum; int ret; @@ -5910,7 +5909,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); - sctp_for_each_hentry(pp, node, &head->chain) + sctp_for_each_hentry(pp, &head->chain) if ((pp->port == rover) && net_eq(sock_net(sk), pp->net)) goto next; @@ -5938,7 +5937,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; sctp_spin_lock(&head->lock); - sctp_for_each_hentry(pp, node, &head->chain) { + sctp_for_each_hentry(pp, &head->chain) { if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; } @@ -5970,7 +5969,7 @@ pp_found: * that this port/socket (sk) combination are already * in an endpoint. */ - sk_for_each_bound(sk2, node, &pp->owner) { + sk_for_each_bound(sk2, &pp->owner) { struct sctp_endpoint *ep2; ep2 = sctp_sk(sk2)->ep; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 392adc41e2e5..f5294047df77 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -407,7 +407,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, { LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; - struct hlist_node *pos; struct rpc_cred *cred = NULL, *entry, *new; unsigned int nr; @@ -415,7 +414,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits); rcu_read_lock(); - hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { + hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; spin_lock(&cache->lock); @@ -439,7 +438,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, } spin_lock(&cache->lock); - hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + hlist_for_each_entry(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; cred = get_rpccred(entry); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f3897d10f649..39a4112faf54 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -670,13 +670,13 @@ static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct list_head pending; - struct hlist_node *lp, *tmp; + struct hlist_node *tmp; int hash = DFR_HASH(item); INIT_LIST_HEAD(&pending); spin_lock(&cache_defer_lock); - hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash) + hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash) if (dreq->item == item) { __unhash_deferred_req(dreq); list_add(&dreq->recent, &pending); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 7963569fc04f..2af7b0cba43a 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -138,13 +138,12 @@ auth_domain_lookup(char *name, struct auth_domain *new) { struct auth_domain *hp; struct hlist_head *head; - struct hlist_node *np; head = &auth_domain_table[hash_str(name, DN_HASHBITS)]; spin_lock(&auth_domain_lock); - hlist_for_each_entry(hp, np, head, hash) { + hlist_for_each_entry(hp, head, hash) { if (strcmp(hp->name, name)==0) { kref_get(&hp->ref); spin_unlock(&auth_domain_lock); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 46754779fd3d..24b167914311 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -473,11 +473,10 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, static struct name_seq *nametbl_find_seq(u32 type) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *ns; seq_head = &table.types[hash(type)]; - hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { + hlist_for_each_entry(ns, seq_head, ns_list) { if (ns->type == type) return ns; } @@ -853,7 +852,6 @@ static int nametbl_list(char *buf, int len, u32 depth_info, u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *seq; int all_types; int ret = 0; @@ -873,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, upbound = ~0; for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, depth, seq->type, lowbound, upbound, i); @@ -889,7 +887,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, ret += nametbl_header(buf + ret, len - ret, depth); i = hash(type); seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { if (seq->type == type) { ret += nameseq_list(seq, buf + ret, len - ret, depth, type, diff --git a/net/tipc/node.c b/net/tipc/node.c index 48f39dd3eae8..6e6c434872e8 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -69,12 +69,11 @@ static unsigned int tipc_hashfn(u32 addr) struct tipc_node *tipc_node_find(u32 addr) { struct tipc_node *node; - struct hlist_node *pos; if (unlikely(!in_own_cluster_exact(addr))) return NULL; - hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) { + hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) return node; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 87d284289012..51be64f163ec 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -263,9 +263,8 @@ static struct sock *__unix_find_socket_byname(struct net *net, int len, int type, unsigned int hash) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &unix_socket_table[hash ^ type]) { + sk_for_each(s, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); if (!net_eq(sock_net(s), net)) @@ -298,10 +297,9 @@ static inline struct sock *unix_find_socket_byname(struct net *net, static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; - struct hlist_node *node; spin_lock(&unix_table_lock); - sk_for_each(s, node, + sk_for_each(s, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; diff --git a/net/unix/diag.c b/net/unix/diag.c index 5ac19dc1d5e4..d591091603bf 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -192,10 +192,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) slot < ARRAY_SIZE(unix_socket_table); s_num = 0, slot++) { struct sock *sk; - struct hlist_node *node; num = 0; - sk_for_each(sk, node, &unix_socket_table[slot]) { + sk_for_each(sk, &unix_socket_table[slot]) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) @@ -226,9 +225,7 @@ static struct sock *unix_lookup_by_ino(int ino) spin_lock(&unix_table_lock); for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { - struct hlist_node *node; - - sk_for_each(sk, node, &unix_socket_table[i]) + sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { sock_hold(sk); spin_unlock(&unix_table_lock); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a306bc66000e..37ca9694aabe 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -208,11 +208,10 @@ static void x25_remove_socket(struct sock *sk) static void x25_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; write_lock_bh(&x25_list_lock); - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev) x25_disconnect(s, ENETUNREACH, 0, 0); @@ -280,12 +279,11 @@ static struct sock *x25_find_listener(struct x25_address *addr, { struct sock *s; struct sock *next_best; - struct hlist_node *node; read_lock_bh(&x25_list_lock); next_best = NULL; - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || !strcmp(addr->x25_addr, @@ -323,9 +321,8 @@ found: static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) { sock_hold(s); goto found; @@ -1782,11 +1779,10 @@ static struct notifier_block x25_dev_notifier = { void x25_kill_by_neigh(struct x25_neigh *nb) { struct sock *s; - struct hlist_node *node; write_lock_bh(&x25_list_lock); - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->neighbour == nb) x25_disconnect(s, ENETUNREACH, 0, 0); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5b47180986f8..167c67d46c6a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -379,27 +379,27 @@ static void xfrm_dst_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp, *entry0 = NULL; + struct hlist_node *tmp, *entry0 = NULL; struct xfrm_policy *pol; unsigned int h0 = 0; redo: - hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(pol, tmp, list, bydst) { unsigned int h; h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask); if (!entry0) { - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_head(&pol->bydst, ndsttable+h); h0 = h; } else { if (h != h0) continue; - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_after(entry0, &pol->bydst); } - entry0 = entry; + entry0 = &pol->bydst; } if (!hlist_empty(list)) { entry0 = NULL; @@ -411,10 +411,10 @@ static void xfrm_idx_hash_transfer(struct hlist_head *list, struct hlist_head *nidxtable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_policy *pol; - hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + hlist_for_each_entry_safe(pol, tmp, list, byidx) { unsigned int h; h = __idx_hash(pol->index, nhashmask); @@ -544,7 +544,6 @@ static u32 xfrm_gen_index(struct net *net, int dir) static u32 idx_generator; for (;;) { - struct hlist_node *entry; struct hlist_head *list; struct xfrm_policy *p; u32 idx; @@ -556,7 +555,7 @@ static u32 xfrm_gen_index(struct net *net, int dir) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; - hlist_for_each_entry(p, entry, list, byidx) { + hlist_for_each_entry(p, list, byidx) { if (p->index == idx) { found = 1; break; @@ -628,13 +627,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; - struct hlist_node *entry, *newpos; + struct hlist_node *newpos; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && xfrm_policy_mark_match(policy, pol) && @@ -691,13 +690,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = 0; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && @@ -729,7 +727,6 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = -ENOENT; if (xfrm_policy_id2dir(id) != dir) @@ -739,7 +736,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, write_lock_bh(&xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; - hlist_for_each_entry(pol, entry, chain, byidx) { + hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); @@ -772,10 +769,9 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; int i; - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; @@ -789,7 +785,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi } } for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) @@ -828,11 +824,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; int i; again1: - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; @@ -852,7 +847,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) @@ -980,7 +975,6 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, int err; struct xfrm_policy *pol, *ret; const xfrm_address_t *daddr, *saddr; - struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; @@ -992,7 +986,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, read_lock_bh(&xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -1008,7 +1002,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -3041,13 +3035,12 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector u8 dir, u8 type) { struct xfrm_policy *pol, *ret = NULL; - struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; read_lock_bh(&xfrm_policy_lock); chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; @@ -3056,7 +3049,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector } } chain = &init_net.xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && pol->priority < priority) { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ae01bdbcb294..2c341bdaf47c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -72,10 +72,10 @@ static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *nspitable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_state *x; - hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(x, tmp, list, bydst) { unsigned int h; h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, @@ -368,14 +368,14 @@ static void xfrm_state_gc_task(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); wake_up(&net->xfrm.km_waitq); @@ -577,10 +577,9 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi int i, err = 0; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, @@ -613,10 +612,9 @@ int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -685,9 +683,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, { unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { + hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -710,9 +707,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, { unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -798,7 +794,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); unsigned int h, h_wildcard; - struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; int error = 0; @@ -810,7 +805,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, spin_lock_bh(&xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -826,7 +821,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -906,11 +901,10 @@ xfrm_stateonly_find(struct net *net, u32 mark, { unsigned int h; struct xfrm_state *rx = NULL, *x = NULL; - struct hlist_node *entry; spin_lock(&xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -972,12 +966,11 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) unsigned short family = xnew->props.family; u32 reqid = xnew->props.reqid; struct xfrm_state *x; - struct hlist_node *entry; unsigned int h; u32 mark = xnew->mark.v & xnew->mark.m; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -1004,11 +997,10 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, const xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - struct hlist_node *entry; struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1215,12 +1207,11 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) { unsigned int h; struct xfrm_state *x; - struct hlist_node *entry; if (m->reqid) { h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, init_net.xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1237,7 +1228,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) } else { h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, init_net.xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1466,10 +1457,9 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s int i; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->km.state == XFRM_STATE_ACQ) { diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c index 55a6271bce7a..ff63fe00c195 100644 --- a/security/integrity/ima/ima_queue.c +++ b/security/integrity/ima/ima_queue.c @@ -45,12 +45,11 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value) { struct ima_queue_entry *qe, *ret = NULL; unsigned int key; - struct hlist_node *pos; int rc; key = ima_hash_key(digest_value); rcu_read_lock(); - hlist_for_each_entry_rcu(qe, pos, &ima_htable.queue[key], hnext) { + hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) { rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE); if (rc == 0) { ret = qe; diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 4d3fab47e643..dad36a6ab45f 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -188,11 +188,9 @@ int avc_get_hash_stats(char *page) for (i = 0; i < AVC_CACHE_SLOTS; i++) { head = &avc_cache.slots[i]; if (!hlist_empty(head)) { - struct hlist_node *next; - slots_used++; chain_len = 0; - hlist_for_each_entry_rcu(node, next, head, list) + hlist_for_each_entry_rcu(node, head, list) chain_len++; if (chain_len > max_chain_len) max_chain_len = chain_len; @@ -241,7 +239,6 @@ static inline int avc_reclaim_node(void) int hvalue, try, ecx; unsigned long flags; struct hlist_head *head; - struct hlist_node *next; spinlock_t *lock; for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) { @@ -253,7 +250,7 @@ static inline int avc_reclaim_node(void) continue; rcu_read_lock(); - hlist_for_each_entry(node, next, head, list) { + hlist_for_each_entry(node, head, list) { avc_node_delete(node); avc_cache_stats_incr(reclaims); ecx++; @@ -301,11 +298,10 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) struct avc_node *node, *ret = NULL; int hvalue; struct hlist_head *head; - struct hlist_node *next; hvalue = avc_hash(ssid, tsid, tclass); head = &avc_cache.slots[hvalue]; - hlist_for_each_entry_rcu(node, next, head, list) { + hlist_for_each_entry_rcu(node, head, list) { if (ssid == node->ae.ssid && tclass == node->ae.tclass && tsid == node->ae.tsid) { @@ -394,7 +390,6 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec node = avc_alloc_node(); if (node) { struct hlist_head *head; - struct hlist_node *next; spinlock_t *lock; hvalue = avc_hash(ssid, tsid, tclass); @@ -404,7 +399,7 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec lock = &avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); - hlist_for_each_entry(pos, next, head, list) { + hlist_for_each_entry(pos, head, list) { if (pos->ae.ssid == ssid && pos->ae.tsid == tsid && pos->ae.tclass == tclass) { @@ -541,7 +536,6 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass, unsigned long flag; struct avc_node *pos, *node, *orig = NULL; struct hlist_head *head; - struct hlist_node *next; spinlock_t *lock; node = avc_alloc_node(); @@ -558,7 +552,7 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass, spin_lock_irqsave(lock, flag); - hlist_for_each_entry(pos, next, head, list) { + hlist_for_each_entry(pos, head, list) { if (ssid == pos->ae.ssid && tsid == pos->ae.tsid && tclass == pos->ae.tclass && @@ -614,7 +608,6 @@ out: static void avc_flush(void) { struct hlist_head *head; - struct hlist_node *next; struct avc_node *node; spinlock_t *lock; unsigned long flag; @@ -630,7 +623,7 @@ static void avc_flush(void) * prevent RCU grace periods from ending. */ rcu_read_lock(); - hlist_for_each_entry(node, next, head, list) + hlist_for_each_entry(node, head, list) avc_node_delete(node); rcu_read_unlock(); spin_unlock_irqrestore(lock, flag); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bc4ad7977438..c8be0fbc5145 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -314,7 +314,6 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) { struct hlist_head *head; - struct hlist_node *pos; struct perf_sample_id *sid; int hash; @@ -324,7 +323,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; - hlist_for_each_entry(sid, pos, head, node) + hlist_for_each_entry(sid, head, node) if (sid->id == id) return sid->evsel; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b6eea5cc7b34..adb17f266b28 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -268,14 +268,13 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; - struct hlist_node *n; if (irqfd->gsi >= irq_rt->nr_rt_entries) { rcu_assign_pointer(irqfd->irq_entry, NULL); return; } - hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) { + hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) rcu_assign_pointer(irqfd->irq_entry, e); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ff6d40e2c06d..e9073cf4d040 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -173,7 +173,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i = 0; struct kvm_irq_routing_table *irq_rt; - struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -184,7 +183,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) rcu_read_lock(); irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, n, &irq_rt->map[irq], link) + hlist_for_each_entry(e, &irq_rt->map[irq], link) irq_set[i++] = *e; rcu_read_unlock(); @@ -212,7 +211,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; - struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -227,7 +225,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) rcu_read_lock(); irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { + hlist_for_each_entry(e, &irq_rt->map[irq], link) { if (likely(e->type == KVM_IRQ_ROUTING_MSI)) ret = kvm_set_msi_inatomic(e, kvm); else @@ -241,13 +239,12 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - struct hlist_node *n; int gsi; rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) { rcu_read_unlock(); @@ -263,7 +260,6 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - struct hlist_node *n; int gsi; trace_kvm_ack_irq(irqchip, pin); @@ -271,7 +267,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); @@ -369,13 +365,12 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask) { struct kvm_irq_mask_notifier *kimn; - struct hlist_node *n; int gsi; rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) + hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); rcu_read_unlock(); @@ -396,13 +391,12 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, int delta; unsigned max_pin; struct kvm_kernel_irq_routing_entry *ei; - struct hlist_node *n; /* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and MSI. */ - hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) + hlist_for_each_entry(ei, &rt->map[ue->gsi], link) if (ei->type == KVM_IRQ_ROUTING_MSI || ue->type == KVM_IRQ_ROUTING_MSI || ue->u.irqchip.irqchip == ei->irqchip.irqchip) -- cgit v1.2.3 From f9c6a655a94042f94c0adb30d07d93cfd8915e95 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Feb 2013 21:36:03 +0000 Subject: dmaengine: dw_dmac: move to generic DMA binding The original device tree binding for this driver, from Viresh Kumar unfortunately conflicted with the generic DMA binding, and did not allow to completely seperate slave device configuration from the controller. This is an attempt to replace it with an implementation of the generic binding, but it is currently completely untested, because I do not have any hardware with this particular controller. The patch applies on top of the slave-dma tree, which contains both the base support for the generic DMA binding, as well as the earlier attempt from Viresh. Both of these are currently not merged upstream however. This version incorporates feedback from Viresh Kumar, Andy Shevchenko and Russell King. Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Acked-by: Andy Shevchenko Cc: Vinod Koul Cc: devicetree-discuss@lists.ozlabs.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/snps-dma.txt | 70 +++++----- drivers/dma/dw_dmac.c | 145 ++++++++++----------- drivers/dma/dw_dmac_regs.h | 7 +- include/linux/dw_dmac.h | 5 - 4 files changed, 111 insertions(+), 116 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt index 5bb3dfb6f1d8..d58675ea1abf 100644 --- a/Documentation/devicetree/bindings/dma/snps-dma.txt +++ b/Documentation/devicetree/bindings/dma/snps-dma.txt @@ -3,59 +3,61 @@ Required properties: - compatible: "snps,dma-spear1340" - reg: Address range of the DMAC registers -- interrupt-parent: Should be the phandle for the interrupt controller - that services interrupts for this device - interrupt: Should contain the DMAC interrupt number -- nr_channels: Number of channels supported by hardware -- is_private: The device channels should be marked as private and not for by the - general purpose DMA channel allocator. False if not passed. +- dma-channels: Number of channels supported by hardware +- dma-requests: Number of DMA request lines supported, up to 16 +- dma-masters: Number of AHB masters supported by the controller +- #dma-cells: must be <3> - chan_allocation_order: order of allocation of channel, 0 (default): ascending, 1: descending - chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1: increase from chan n->0 - block_size: Maximum block size supported by the controller -- nr_masters: Number of AHB masters supported by the controller - data_width: Maximum data width supported by hardware per AHB master (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) -- slave_info: - - bus_id: name of this device channel, not just a device name since - devices may have more than one channel e.g. "foo_tx". For using the - dw_generic_filter(), slave drivers must pass exactly this string as - param to filter function. - - cfg_hi: Platform-specific initializer for the CFG_HI register - - cfg_lo: Platform-specific initializer for the CFG_LO register - - src_master: src master for transfers on allocated channel. - - dst_master: dest master for transfers on allocated channel. + + +Optional properties: +- interrupt-parent: Should be the phandle for the interrupt controller + that services interrupts for this device +- is_private: The device channels should be marked as private and not for by the + general purpose DMA channel allocator. False if not passed. Example: - dma@fc000000 { + dmahost: dma@fc000000 { compatible = "snps,dma-spear1340"; reg = <0xfc000000 0x1000>; interrupt-parent = <&vic1>; interrupts = <12>; - nr_channels = <8>; + dma-channels = <8>; + dma-requests = <16>; + dma-masters = <2>; + #dma-cells = <3>; chan_allocation_order = <1>; chan_priority = <1>; block_size = <0xfff>; - nr_masters = <2>; data_width = <3 3 0 0>; + }; - slave_info { - uart0-tx { - bus_id = "uart0-tx"; - cfg_hi = <0x4000>; /* 0x8 << 11 */ - cfg_lo = <0>; - src_master = <0>; - dst_master = <1>; - }; - spi0-tx { - bus_id = "spi0-tx"; - cfg_hi = <0x2000>; /* 0x4 << 11 */ - cfg_lo = <0>; - src_master = <0>; - dst_master = <0>; - }; - }; +DMA clients connected to the Designware DMA controller must use the format +described in the dma.txt file, using a four-cell specifier for each channel. +The four cells in order are: + +1. A phandle pointing to the DMA controller +2. The DMA request line number +3. Source master for transfers on allocated channel +4. Destination master for transfers on allocated channel + +Example: + + serial@e0000000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0xe0000000 0x1000>; + interrupts = <0 35 0x4>; + status = "disabled"; + dmas = <&dmahost 12 0 1>, + <&dmahost 13 0 1 0>; + dma-names = "rx", "rx"; }; diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 4c83f18803f1..c3159abf9ac1 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -170,7 +171,13 @@ static void dwc_initialize(struct dw_dma_chan *dwc) if (dwc->initialized == true) return; - if (dws) { + if (dws && dws->cfg_hi == ~0 && dws->cfg_lo == ~0) { + /* autoconfigure based on request line from DT */ + if (dwc->direction == DMA_MEM_TO_DEV) + cfghi = DWC_CFGH_DST_PER(dwc->request_line); + else if (dwc->direction == DMA_DEV_TO_MEM) + cfghi = DWC_CFGH_SRC_PER(dwc->request_line); + } else if (dws) { /* * We need controller-specific data to set up slave * transfers. @@ -1225,49 +1232,64 @@ static void dwc_free_chan_resources(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } -bool dw_dma_generic_filter(struct dma_chan *chan, void *param) +struct dw_dma_filter_args { + struct dw_dma *dw; + unsigned int req; + unsigned int src; + unsigned int dst; +}; + +static bool dw_dma_generic_filter(struct dma_chan *chan, void *param) { + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - static struct dw_dma *last_dw; - static char *last_bus_id; - int i = -1; + struct dw_dma_filter_args *fargs = param; + struct dw_dma_slave *dws = &dwc->slave; - /* - * dmaengine framework calls this routine for all channels of all dma - * controller, until true is returned. If 'param' bus_id is not - * registered with a dma controller (dw), then there is no need of - * running below function for all channels of dw. - * - * This block of code does this by saving the parameters of last - * failure. If dw and param are same, i.e. trying on same dw with - * different channel, return false. - */ - if ((last_dw == dw) && (last_bus_id == param)) - return false; - /* - * Return true: - * - If dw_dma's platform data is not filled with slave info, then all - * dma controllers are fine for transfer. - * - Or if param is NULL - */ - if (!dw->sd || !param) - return true; + /* ensure the device matches our channel */ + if (chan->device != &fargs->dw->dma) + return false; - while (++i < dw->sd_count) { - if (!strcmp(dw->sd[i].bus_id, param)) { - chan->private = &dw->sd[i]; - last_dw = NULL; - last_bus_id = NULL; + dws->dma_dev = dw->dma.dev; + dws->cfg_hi = ~0; + dws->cfg_lo = ~0; + dws->src_master = fargs->src; + dws->dst_master = fargs->dst; - return true; - } - } + dwc->request_line = fargs->req; - last_dw = dw; - last_bus_id = param; - return false; + chan->private = dws; + + return true; +} + +static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dw_dma *dw = ofdma->of_dma_data; + struct dw_dma_filter_args fargs = { + .dw = dw, + }; + dma_cap_mask_t cap; + + if (dma_spec->args_count != 3) + return NULL; + + fargs.req = be32_to_cpup(dma_spec->args+0); + fargs.src = be32_to_cpup(dma_spec->args+1); + fargs.dst = be32_to_cpup(dma_spec->args+2); + + if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS || + fargs.src >= dw->nr_masters || + fargs.dst >= dw->nr_masters)) + return NULL; + + dma_cap_zero(cap); + dma_cap_set(DMA_SLAVE, cap); + + /* TODO: there should be a simpler way to do this */ + return dma_request_channel(cap, dw_dma_generic_filter, &fargs); } -EXPORT_SYMBOL(dw_dma_generic_filter); /* --------------------- Cyclic DMA API extensions -------------------- */ @@ -1553,9 +1575,8 @@ static void dw_dma_off(struct dw_dma *dw) static struct dw_dma_platform_data * dw_dma_parse_dt(struct platform_device *pdev) { - struct device_node *sn, *cn, *np = pdev->dev.of_node; + struct device_node *np = pdev->dev.of_node; struct dw_dma_platform_data *pdata; - struct dw_dma_slave *sd; u32 tmp, arr[4]; if (!np) { @@ -1567,7 +1588,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!pdata) return NULL; - if (of_property_read_u32(np, "nr_channels", &pdata->nr_channels)) + if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels)) return NULL; if (of_property_read_bool(np, "is_private")) @@ -1582,7 +1603,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!of_property_read_u32(np, "block_size", &tmp)) pdata->block_size = tmp; - if (!of_property_read_u32(np, "nr_masters", &tmp)) { + if (!of_property_read_u32(np, "dma-masters", &tmp)) { if (tmp > 4) return NULL; @@ -1594,36 +1615,6 @@ dw_dma_parse_dt(struct platform_device *pdev) for (tmp = 0; tmp < pdata->nr_masters; tmp++) pdata->data_width[tmp] = arr[tmp]; - /* parse slave data */ - sn = of_find_node_by_name(np, "slave_info"); - if (!sn) - return pdata; - - /* calculate number of slaves */ - tmp = of_get_child_count(sn); - if (!tmp) - return NULL; - - sd = devm_kzalloc(&pdev->dev, sizeof(*sd) * tmp, GFP_KERNEL); - if (!sd) - return NULL; - - pdata->sd = sd; - pdata->sd_count = tmp; - - for_each_child_of_node(sn, cn) { - sd->dma_dev = &pdev->dev; - of_property_read_string(cn, "bus_id", &sd->bus_id); - of_property_read_u32(cn, "cfg_hi", &sd->cfg_hi); - of_property_read_u32(cn, "cfg_lo", &sd->cfg_lo); - if (!of_property_read_u32(cn, "src_master", &tmp)) - sd->src_master = tmp; - - if (!of_property_read_u32(cn, "dst_master", &tmp)) - sd->dst_master = tmp; - sd++; - } - return pdata; } #else @@ -1704,8 +1695,6 @@ static int dw_probe(struct platform_device *pdev) clk_prepare_enable(dw->clk); dw->regs = regs; - dw->sd = pdata->sd; - dw->sd_count = pdata->sd_count; /* get hardware configuration parameters */ if (autocfg) { @@ -1836,6 +1825,14 @@ static int dw_probe(struct platform_device *pdev) dma_async_device_register(&dw->dma); + if (pdev->dev.of_node) { + err = of_dma_controller_register(pdev->dev.of_node, + dw_dma_xlate, dw); + if (err && err != -ENODEV) + dev_err(&pdev->dev, + "could not register of_dma_controller\n"); + } + return 0; } @@ -1844,6 +1841,8 @@ static int __devexit dw_remove(struct platform_device *pdev) struct dw_dma *dw = platform_get_drvdata(pdev); struct dw_dma_chan *dwc, *_dwc; + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); dw_dma_off(dw); dma_async_device_unregister(&dw->dma); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 88dd8eb31957..cf0ce5c77d60 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -13,6 +13,7 @@ #include #define DW_DMA_MAX_NR_CHANNELS 8 +#define DW_DMA_MAX_NR_REQUESTS 16 /* flow controller */ enum dw_dma_fc { @@ -211,6 +212,8 @@ struct dw_dma_chan { /* hardware configuration */ unsigned int block_size; bool nollp; + unsigned int request_line; + struct dw_dma_slave slave; /* configuration passed via DMA_SLAVE_CONFIG */ struct dma_slave_config dma_sconfig; @@ -239,10 +242,6 @@ struct dw_dma { struct tasklet_struct tasklet; struct clk *clk; - /* slave information */ - struct dw_dma_slave *sd; - unsigned int sd_count; - u8 all_chan_mask; /* hardware configuration */ diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 41766de66e33..481ab2345d6b 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -27,7 +27,6 @@ */ struct dw_dma_slave { struct device *dma_dev; - const char *bus_id; u32 cfg_hi; u32 cfg_lo; u8 src_master; @@ -60,9 +59,6 @@ struct dw_dma_platform_data { unsigned short block_size; unsigned char nr_masters; unsigned char data_width[4]; - - struct dw_dma_slave *sd; - unsigned int sd_count; }; /* bursts size */ @@ -114,6 +110,5 @@ void dw_dma_cyclic_stop(struct dma_chan *chan); dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); -bool dw_dma_generic_filter(struct dma_chan *chan, void *param); #endif /* DW_DMAC_H */ -- cgit v1.2.3 From edddbb1eda61753c886a3c5e159293a7b3a9e30a Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 28 Feb 2013 20:30:09 -0500 Subject: SUNRPC: add call to get configured timeout Returns the configured timeout for the xprt of the rpc client. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 34206b84d8da..21d52d0dc15c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,6 +160,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); int rpc_protocol(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); +unsigned long rpc_get_timeout(struct rpc_clnt *clnt); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a9f7906c1a6a..09dc0c2b56a3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1195,6 +1195,21 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_max_payload); +/** + * rpc_get_timeout - Get timeout for transport in tenths of seconds + * @clnt: RPC client to query + */ +unsigned long rpc_get_timeout(struct rpc_clnt *clnt) +{ + unsigned long ret; + + rcu_read_lock(); + ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval; + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_get_timeout); + /** * rpc_force_rebind - force transport to check that remote port is unchanged * @clnt: client to rebind -- cgit v1.2.3 From 3000512137602b84d1ad5fd89d62984993a19bb6 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 28 Feb 2013 20:30:10 -0500 Subject: NFSv4.1: LAYOUTGET EDELAY loops timeout to the MDS The client will currently try LAYOUTGETs forever if a server is returning NFS4ERR_LAYOUTTRYLATER or NFS4ERR_RECALLCONFLICT - even if the client no longer needs the layout (ie process killed, unmounted). This patch uses the DS timeout value (module parameter 'dataserver_timeo' via rpc layer) to set an upper limit of how long the client tries LATOUTGETs in this situation. Once the timeout is reached, IO is redirected to the MDS. This also changes how the client checks if a layout is on the clp list to avoid a double list_add. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++++- fs/nfs/pnfs.c | 7 +++---- include/linux/nfs_xdr.h | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 37eb38566fbc..b2671cb0f901 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -93,6 +93,8 @@ static int nfs4_map_errors(int err) return err; switch (err) { case -NFS4ERR_RESOURCE: + case -NFS4ERR_LAYOUTTRYLATER: + case -NFS4ERR_RECALLCONFLICT: return -EREMOTEIO; case -NFS4ERR_WRONGSEC: return -EPERM; @@ -6046,6 +6048,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layout_hdr *lo; struct nfs4_state *state = NULL; + unsigned long timeo, giveup; dprintk("--> %s\n", __func__); @@ -6057,7 +6060,10 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) goto out; case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: - task->tk_status = -NFS4ERR_DELAY; + timeo = rpc_get_timeout(task->tk_client); + giveup = lgp->args.timestamp + timeo; + if (time_after(giveup, jiffies)) + task->tk_status = -NFS4ERR_DELAY; break; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: @@ -6178,6 +6184,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) return ERR_PTR(-ENOMEM); } lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->args.timestamp = jiffies; lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 97767c8683f9..48ac5aad6258 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1181,7 +1181,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; - bool first = false; + bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) goto out; @@ -1215,10 +1215,9 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; atomic_inc(&lo->plh_outstanding); - if (list_empty(&lo->plh_segs)) - first = true; - + first = list_empty(&lo->plh_layouts) ? true : false; spin_unlock(&ino->i_lock); + if (first) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 29adb12c7ecf..2250cab6fc4b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,6 +233,7 @@ struct nfs4_layoutget_args { struct inode *inode; struct nfs_open_context *ctx; nfs4_stateid stateid; + unsigned long timestamp; struct nfs4_layoutdriver_data layout; }; -- cgit v1.2.3 From 1a71fb84fda651105e1e194c2d3a3a13a38210a9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 27 Sep 2011 22:21:37 +0200 Subject: rtc: stmp3xxx: add wdt-accessor function This RTC also includes a watchdog timer. Provide an accessor function for setting the watchdog timeout value which will be picked up by a watchdog driver. Also register the platform_device for the watchdog here to get the boot-time dependencies right. Signed-off-by: Wolfram Sang Acked-by: Andrew Morton Signed-off-by: Wim Van Sebroeck --- drivers/rtc/rtc-stmp3xxx.c | 64 ++++++++++++++++++++++++++++++++++++++++ include/linux/stmp3xxx_rtc_wdt.h | 15 ++++++++++ 2 files changed, 79 insertions(+) create mode 100644 include/linux/stmp3xxx_rtc_wdt.h (limited to 'include/linux') diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index b2a8ed99b2bf..98f0d3c30738 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include @@ -36,6 +38,7 @@ #define STMP3XXX_RTC_CTRL_ALARM_IRQ_EN 0x00000001 #define STMP3XXX_RTC_CTRL_ONEMSEC_IRQ_EN 0x00000002 #define STMP3XXX_RTC_CTRL_ALARM_IRQ 0x00000004 +#define STMP3XXX_RTC_CTRL_WATCHDOGEN 0x00000010 #define STMP3XXX_RTC_STAT 0x10 #define STMP3XXX_RTC_STAT_STALE_SHIFT 16 @@ -45,6 +48,8 @@ #define STMP3XXX_RTC_ALARM 0x40 +#define STMP3XXX_RTC_WATCHDOG 0x50 + #define STMP3XXX_RTC_PERSISTENT0 0x60 #define STMP3XXX_RTC_PERSISTENT0_SET 0x64 #define STMP3XXX_RTC_PERSISTENT0_CLR 0x68 @@ -52,12 +57,70 @@ #define STMP3XXX_RTC_PERSISTENT0_ALARM_EN 0x00000004 #define STMP3XXX_RTC_PERSISTENT0_ALARM_WAKE 0x00000080 +#define STMP3XXX_RTC_PERSISTENT1 0x70 +/* missing bitmask in headers */ +#define STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER 0x80000000 + struct stmp3xxx_rtc_data { struct rtc_device *rtc; void __iomem *io; int irq_alarm; }; +#if IS_ENABLED(CONFIG_STMP3XXX_RTC_WATCHDOG) +/** + * stmp3xxx_wdt_set_timeout - configure the watchdog inside the STMP3xxx RTC + * @dev: the parent device of the watchdog (= the RTC) + * @timeout: the desired value for the timeout register of the watchdog. + * 0 disables the watchdog + * + * The watchdog needs one register and two bits which are in the RTC domain. + * To handle the resource conflict, the RTC driver will create another + * platform_device for the watchdog driver as a child of the RTC device. + * The watchdog driver is passed the below accessor function via platform_data + * to configure the watchdog. Locking is not needed because accessing SET/CLR + * registers is atomic. + */ + +static void stmp3xxx_wdt_set_timeout(struct device *dev, u32 timeout) +{ + struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (timeout) { + writel(timeout, rtc_data->io + STMP3XXX_RTC_WATCHDOG); + writel(STMP3XXX_RTC_CTRL_WATCHDOGEN, + rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_SET); + writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER, + rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_SET); + } else { + writel(STMP3XXX_RTC_CTRL_WATCHDOGEN, + rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_CLR); + writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER, + rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_CLR); + } +} + +static struct stmp3xxx_wdt_pdata wdt_pdata = { + .wdt_set_timeout = stmp3xxx_wdt_set_timeout, +}; + +static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) +{ + struct platform_device *wdt_pdev = + platform_device_alloc("stmp3xxx_rtc_wdt", rtc_pdev->id); + + if (wdt_pdev) { + wdt_pdev->dev.parent = &rtc_pdev->dev; + wdt_pdev->dev.platform_data = &wdt_pdata; + platform_device_add(wdt_pdev); + } +} +#else +static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) +{ +} +#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ + static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) { /* @@ -233,6 +296,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) goto out_irq_alarm; } + stmp3xxx_wdt_register(pdev); return 0; out_irq_alarm: diff --git a/include/linux/stmp3xxx_rtc_wdt.h b/include/linux/stmp3xxx_rtc_wdt.h new file mode 100644 index 000000000000..1dd12c96231b --- /dev/null +++ b/include/linux/stmp3xxx_rtc_wdt.h @@ -0,0 +1,15 @@ +/* + * stmp3xxx_rtc_wdt.h + * + * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K. + * + * This file is released under the GPLv2. + */ +#ifndef __LINUX_STMP3XXX_RTC_WDT_H +#define __LINUX_STMP3XXX_RTC_WDT_H + +struct stmp3xxx_wdt_pdata { + void (*wdt_set_timeout)(struct device *dev, u32 timeout); +}; + +#endif /* __LINUX_STMP3XXX_RTC_WDT_H */ -- cgit v1.2.3 From f82dedf812ecdf0c19c6c240e85a4a487ab62016 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 24 Jan 2013 18:13:34 +0100 Subject: watchdog: bcm47xx_wdt.c: use platform device Instead of accessing the function to set the watchdog timer directly, register a platform driver the platform could register to use this watchdog driver. Signed-off-by: Hauke Mehrtens Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bcm47xx_wdt.c | 156 ++++++++++++++++++++--------------------- include/linux/bcm47xx_wdt.h | 9 +++ 2 files changed, 87 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c index 4c520d68397e..97ccfce0dabb 100644 --- a/drivers/watchdog/bcm47xx_wdt.c +++ b/drivers/watchdog/bcm47xx_wdt.c @@ -3,6 +3,7 @@ * * Copyright (C) 2008 Aleksandar Radovanovic * Copyright (C) 2009 Matthieu CASTET + * Copyright (C) 2012-2013 Hauke Mehrtens * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -12,19 +13,19 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include #include #include #include +#include #include #include #include #include #include -#include -#include #define DRV_NAME "bcm47xx_wdt" @@ -43,48 +44,19 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -static struct timer_list wdt_timer; -static atomic_t ticks; - -static inline void bcm47xx_wdt_hw_start(void) +static inline struct bcm47xx_wdt *bcm47xx_wdt_get(struct watchdog_device *wdd) { - /* this is 2,5s on 100Mhz clock and 2s on 133 Mhz */ - switch (bcm47xx_bus_type) { -#ifdef CONFIG_BCM47XX_SSB - case BCM47XX_BUS_TYPE_SSB: - ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0xfffffff); - break; -#endif -#ifdef CONFIG_BCM47XX_BCMA - case BCM47XX_BUS_TYPE_BCMA: - bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, - 0xfffffff); - break; -#endif - } + return container_of(wdd, struct bcm47xx_wdt, wdd); } -static inline int bcm47xx_wdt_hw_stop(void) +static void bcm47xx_timer_tick(unsigned long data) { - switch (bcm47xx_bus_type) { -#ifdef CONFIG_BCM47XX_SSB - case BCM47XX_BUS_TYPE_SSB: - return ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0); -#endif -#ifdef CONFIG_BCM47XX_BCMA - case BCM47XX_BUS_TYPE_BCMA: - bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, 0); - return 0; -#endif - } - return -EINVAL; -} + struct bcm47xx_wdt *wdt = (struct bcm47xx_wdt *)data; + u32 next_tick = min(wdt->wdd.timeout * 1000, wdt->max_timer_ms); -static void bcm47xx_timer_tick(unsigned long unused) -{ - if (!atomic_dec_and_test(&ticks)) { - bcm47xx_wdt_hw_start(); - mod_timer(&wdt_timer, jiffies + HZ); + if (!atomic_dec_and_test(&wdt->soft_ticks)) { + wdt->timer_set_ms(wdt, next_tick); + mod_timer(&wdt->soft_timer, jiffies + HZ); } else { pr_crit("Watchdog will fire soon!!!\n"); } @@ -92,23 +64,29 @@ static void bcm47xx_timer_tick(unsigned long unused) static int bcm47xx_wdt_keepalive(struct watchdog_device *wdd) { - atomic_set(&ticks, wdt_time); + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + atomic_set(&wdt->soft_ticks, wdd->timeout); return 0; } static int bcm47xx_wdt_start(struct watchdog_device *wdd) { - bcm47xx_wdt_pet(); - bcm47xx_timer_tick(0); + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + bcm47xx_wdt_keepalive(wdd); + bcm47xx_timer_tick((unsigned long)wdt); return 0; } static int bcm47xx_wdt_stop(struct watchdog_device *wdd) { - del_timer_sync(&wdt_timer); - bcm47xx_wdt_hw_stop(); + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + del_timer_sync(&wdt->soft_timer); + wdt->timer_set(wdt, 0); return 0; } @@ -116,10 +94,13 @@ static int bcm47xx_wdt_stop(struct watchdog_device *wdd) static int bcm47xx_wdt_set_timeout(struct watchdog_device *wdd, unsigned int new_time) { - if ((new_time <= 0) || (new_time > WDT_MAX_TIME)) + if (new_time < 1 || new_time > WDT_MAX_TIME) { + pr_warn("timeout value must be 1<=x<=%d, using %d\n", + WDT_MAX_TIME, new_time); return -EINVAL; + } - wdt_time = new_time; + wdd->timeout = new_time; return 0; } @@ -133,8 +114,11 @@ static const struct watchdog_info bcm47xx_wdt_info = { static int bcm47xx_wdt_notify_sys(struct notifier_block *this, unsigned long code, void *unused) { + struct bcm47xx_wdt *wdt; + + wdt = container_of(this, struct bcm47xx_wdt, notifier); if (code == SYS_DOWN || code == SYS_HALT) - bcm47xx_wdt_stop(); + wdt->wdd.ops->stop(&wdt->wdd); return NOTIFY_DONE; } @@ -146,56 +130,72 @@ static struct watchdog_ops bcm47xx_wdt_ops = { .set_timeout = bcm47xx_wdt_set_timeout, }; -static struct watchdog_device bcm47xx_wdt_wdd = { - .info = &bcm47xx_wdt_info, - .ops = &bcm47xx_wdt_ops, -}; - -static struct notifier_block bcm47xx_wdt_notifier = { - .notifier_call = bcm47xx_wdt_notify_sys, -}; - -static int __init bcm47xx_wdt_init(void) +static int bcm47xx_wdt_probe(struct platform_device *pdev) { int ret; + struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev); - if (bcm47xx_wdt_hw_stop() < 0) - return -ENODEV; + if (!wdt) + return -ENXIO; - setup_timer(&wdt_timer, bcm47xx_timer_tick, 0L); + setup_timer(&wdt->soft_timer, bcm47xx_timer_tick, + (long unsigned int)wdt); - if (bcm47xx_wdt_settimeout(wdt_time)) { - bcm47xx_wdt_settimeout(WDT_DEFAULT_TIME); - pr_info("wdt_time value must be 0 < wdt_time < %d, using %d\n", - (WDT_MAX_TIME + 1), wdt_time); - } - watchdog_set_nowayout(&bcm47xx_wdt_wdd, nowayout); + wdt->wdd.ops = &bcm47xx_wdt_ops; + wdt->wdd.info = &bcm47xx_wdt_info; + wdt->wdd.timeout = WDT_DEFAULT_TIME; + ret = wdt->wdd.ops->set_timeout(&wdt->wdd, timeout); + if (ret) + goto err_timer; + watchdog_set_nowayout(&wdt->wdd, nowayout); + + wdt->notifier.notifier_call = &bcm47xx_wdt_notify_sys; - ret = register_reboot_notifier(&bcm47xx_wdt_notifier); + ret = register_reboot_notifier(&wdt->notifier); if (ret) - return ret; + goto err_timer; - ret = watchdog_register_device(&bcm47xx_wdt_wdd); - if (ret) { - unregister_reboot_notifier(&bcm47xx_wdt_notifier); - return ret; - } + ret = watchdog_register_device(&wdt->wdd); + if (ret) + goto err_notifier; pr_info("BCM47xx Watchdog Timer enabled (%d seconds%s)\n", wdt_time, nowayout ? ", nowayout" : ""); return 0; + +err_notifier: + unregister_reboot_notifier(&wdt->notifier); +err_timer: + del_timer_sync(&wdt->soft_timer); + + return ret; } -static void __exit bcm47xx_wdt_exit(void) +static int bcm47xx_wdt_remove(struct platform_device *pdev) { - watchdog_unregister_device(&bcm47xx_wdt_wdd); + struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev); + + if (!wdt) + return -ENXIO; + + watchdog_unregister_device(&wdt->wdd); + unregister_reboot_notifier(&wdt->notifier); - unregister_reboot_notifier(&bcm47xx_wdt_notifier); + return 0; } -module_init(bcm47xx_wdt_init); -module_exit(bcm47xx_wdt_exit); +static struct platform_driver bcm47xx_wdt_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "bcm47xx-wdt", + }, + .probe = bcm47xx_wdt_probe, + .remove = bcm47xx_wdt_remove, +}; + +module_platform_driver(bcm47xx_wdt_driver); MODULE_AUTHOR("Aleksandar Radovanovic"); +MODULE_AUTHOR("Hauke Mehrtens "); MODULE_DESCRIPTION("Watchdog driver for Broadcom BCM47xx"); MODULE_LICENSE("GPL"); diff --git a/include/linux/bcm47xx_wdt.h b/include/linux/bcm47xx_wdt.h index e5dfc256485b..b708786d4cbf 100644 --- a/include/linux/bcm47xx_wdt.h +++ b/include/linux/bcm47xx_wdt.h @@ -1,7 +1,10 @@ #ifndef LINUX_BCM47XX_WDT_H_ #define LINUX_BCM47XX_WDT_H_ +#include +#include #include +#include struct bcm47xx_wdt { @@ -10,6 +13,12 @@ struct bcm47xx_wdt { u32 max_timer_ms; void *driver_data; + + struct watchdog_device wdd; + struct notifier_block notifier; + + struct timer_list soft_timer; + atomic_t soft_ticks; }; static inline void *bcm47xx_wdt_get_drvdata(struct bcm47xx_wdt *wdt) -- cgit v1.2.3 From 3048253ed957fc6cdc34599178408559aa1e0062 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 8 Jan 2013 11:04:10 +0100 Subject: watchdog: core: dt: add support for the timeout-sec dt property Add support for watchdog drivers to initialize/set the timeout field of the watchdog_device structure. The timeout field is initialised either with the module timeout parameter value (if valid) or with the timeout-sec dt property (if valid). If both are invalid the initial value is unchanged. Signed-off-by: Fabio Porcedda Acked-by: Nicolas Ferre Signed-off-by: Wim Van Sebroeck --- Documentation/watchdog/watchdog-kernel-api.txt | 14 +++++- drivers/watchdog/watchdog_core.c | 66 ++++++++++++++++++++++---- drivers/watchdog/watchdog_dev.c | 3 +- include/linux/watchdog.h | 9 ++++ 4 files changed, 80 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt index 086638f6c82d..a0438f3957ca 100644 --- a/Documentation/watchdog/watchdog-kernel-api.txt +++ b/Documentation/watchdog/watchdog-kernel-api.txt @@ -1,6 +1,6 @@ The Linux WatchDog Timer Driver Core kernel API. =============================================== -Last reviewed: 22-May-2012 +Last reviewed: 12-Feb-2013 Wim Van Sebroeck @@ -212,3 +212,15 @@ driver specific data to and a pointer to the data itself. The watchdog_get_drvdata function allows you to retrieve driver specific data. The argument of this function is the watchdog device where you want to retrieve data from. The function returns the pointer to the driver specific data. + +To initialize the timeout field, the following function can be used: + +extern int watchdog_init_timeout(struct watchdog_device *wdd, + unsigned int timeout_parm, struct device *dev); + +The watchdog_init_timeout function allows you to initialize the timeout field +using the module timeout parameter or by retrieving the timeout-sec property from +the device tree (if the module timeout parameter is invalid). Best practice is +to set the default timeout value as timeout value in the watchdog_device and +then use this function to set the user "preferred" timeout value. +This routine returns zero on success and a negative errno code for failure. diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 3796434991fa..05d18b4c661b 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -36,12 +36,68 @@ #include /* For __init/__exit/... */ #include /* For ida_* macros */ #include /* For IS_ERR macros */ +#include /* For of_get_timeout_sec */ #include "watchdog_core.h" /* For watchdog_dev_register/... */ static DEFINE_IDA(watchdog_ida); static struct class *watchdog_class; +static void watchdog_check_min_max_timeout(struct watchdog_device *wdd) +{ + /* + * Check that we have valid min and max timeout values, if + * not reset them both to 0 (=not used or unknown) + */ + if (wdd->min_timeout > wdd->max_timeout) { + pr_info("Invalid min and max timeout values, resetting to 0!\n"); + wdd->min_timeout = 0; + wdd->max_timeout = 0; + } +} + +/** + * watchdog_init_timeout() - initialize the timeout field + * @timeout_parm: timeout module parameter + * @dev: Device that stores the timeout-sec property + * + * Initialize the timeout field of the watchdog_device struct with either the + * timeout module parameter (if it is valid value) or the timeout-sec property + * (only if it is a valid value and the timeout_parm is out of bounds). + * If none of them are valid then we keep the old value (which should normally + * be the default timeout value. + * + * A zero is returned on success and -EINVAL for failure. + */ +int watchdog_init_timeout(struct watchdog_device *wdd, + unsigned int timeout_parm, struct device *dev) +{ + unsigned int t = 0; + int ret = 0; + + watchdog_check_min_max_timeout(wdd); + + /* try to get the tiemout module parameter first */ + if (!watchdog_timeout_invalid(wdd, timeout_parm)) { + wdd->timeout = timeout_parm; + return ret; + } + if (timeout_parm) + ret = -EINVAL; + + /* try to get the timeout_sec property */ + if (dev == NULL || dev->of_node == NULL) + return ret; + of_property_read_u32(dev->of_node, "timeout-sec", &t); + if (!watchdog_timeout_invalid(wdd, t)) + wdd->timeout = t; + else + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(watchdog_init_timeout); + /** * watchdog_register_device() - register a watchdog device * @wdd: watchdog device @@ -63,15 +119,7 @@ int watchdog_register_device(struct watchdog_device *wdd) if (wdd->ops->start == NULL || wdd->ops->stop == NULL) return -EINVAL; - /* - * Check that we have valid min and max timeout values, if - * not reset them both to 0 (=not used or unknown) - */ - if (wdd->min_timeout > wdd->max_timeout) { - pr_info("Invalid min and max timeout values, resetting to 0!\n"); - wdd->min_timeout = 0; - wdd->max_timeout = 0; - } + watchdog_check_min_max_timeout(wdd); /* * Note: now that all watchdog_device data has been verified, we diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index ef8edecfc526..08b48bbf9f4b 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -200,8 +200,7 @@ static int watchdog_set_timeout(struct watchdog_device *wddev, !(wddev->info->options & WDIOF_SETTIMEOUT)) return -EOPNOTSUPP; - if ((wddev->max_timeout != 0) && - (timeout < wddev->min_timeout || timeout > wddev->max_timeout)) + if (watchdog_timeout_invalid(wddev, timeout)) return -EINVAL; mutex_lock(&wddev->lock); diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 3a9df2f43be6..2a3038ee17a3 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -118,6 +118,13 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway set_bit(WDOG_NO_WAY_OUT, &wdd->status); } +/* Use the following function to check if a timeout value is invalid */ +static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t) +{ + return ((wdd->max_timeout != 0) && + (t < wdd->min_timeout || t > wdd->max_timeout)); +} + /* Use the following functions to manipulate watchdog driver specific data */ static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) { @@ -130,6 +137,8 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) } /* drivers/watchdog/watchdog_core.c */ +extern int watchdog_init_timeout(struct watchdog_device *wdd, + unsigned int timeout_parm, struct device *dev); extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); -- cgit v1.2.3 From 8eae508b7c6ff502a71d0293b69e97c5505d5840 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 1 Mar 2013 11:11:47 -0800 Subject: hsi: fix kernel-doc warnings Fix kernel-doc warnings in hsi files: Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'e_handler' description in 'hsi_client' Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'pclaimed' description in 'hsi_client' Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'nb' description in 'hsi_client' Warning(drivers/hsi/hsi.c:434): No description found for parameter 'handler' Warning(drivers/hsi/hsi.c:434): Excess function parameter 'cb' description in 'hsi_register_port_event' Don't document "private:" fields with kernel-doc notation. If you want to leave them fully documented, that's OK, but then don't mark them as "private:". Signed-off-by: Randy Dunlap Cc: Carlos Chinea Cc: Linus Walleij Cc: Greg Kroah-Hartman Cc: linux-kernel@vger.kernel.org Cc: linux-omap@vger.kernel.org Acked-by: Nishanth Menon Signed-off-by: Linus Torvalds --- drivers/hsi/hsi.c | 2 +- include/linux/hsi/hsi.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c index 2d58f939d27f..833dd1afbf46 100644 --- a/drivers/hsi/hsi.c +++ b/drivers/hsi/hsi.c @@ -420,7 +420,7 @@ static int hsi_event_notifier_call(struct notifier_block *nb, /** * hsi_register_port_event - Register a client to receive port events * @cl: HSI client that wants to receive port events - * @cb: Event handler callback + * @handler: Event handler callback * * Clients should register a callback to be able to receive * events from the ports. Registration should happen after diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h index 56fae865e272..0dca785288cf 100644 --- a/include/linux/hsi/hsi.h +++ b/include/linux/hsi/hsi.h @@ -121,9 +121,9 @@ static inline int hsi_register_board_info(struct hsi_board_info const *info, * @device: Driver model representation of the device * @tx_cfg: HSI TX configuration * @rx_cfg: HSI RX configuration - * @e_handler: Callback for handling port events (RX Wake High/Low) - * @pclaimed: Keeps tracks if the clients claimed its associated HSI port - * @nb: Notifier block for port events + * e_handler: Callback for handling port events (RX Wake High/Low) + * pclaimed: Keeps tracks if the clients claimed its associated HSI port + * nb: Notifier block for port events */ struct hsi_client { struct device device; -- cgit v1.2.3 From fd7c092e711ebab55b2688d3859d95dfd0301f73 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:44 +0000 Subject: dm: fix truncated status strings Avoid returning a truncated table or status string instead of setting the DM_BUFFER_FULL_FLAG when the last target of a table fills the buffer. When processing a table or status request, the function retrieve_status calls ti->type->status. If ti->type->status returns non-zero, retrieve_status assumes that the buffer overflowed and sets DM_BUFFER_FULL_FLAG. However, targets don't return non-zero values from their status method on overflow. Most targets returns always zero. If a buffer overflow happens in a target that is not the last in the table, it gets noticed during the next iteration of the loop in retrieve_status; but if a buffer overflow happens in the last target, it goes unnoticed and erroneously truncated data is returned. In the current code, the targets behave in the following way: * dm-crypt returns -ENOMEM if there is not enough space to store the key, but it returns 0 on all other overflows. * dm-thin returns errors from the status method if a disk error happened. This is incorrect because retrieve_status doesn't check the error code, it assumes that all non-zero values mean buffer overflow. * all the other targets always return 0. This patch changes the ti->type->status function to return void (because most targets don't use the return code). Overflow is detected in retrieve_status: if the status method fills up the remaining space completely, it is assumed that buffer overflow happened. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 39 +++++---------------- drivers/md/dm-delay.c | 8 ++--- drivers/md/dm-flakey.c | 7 ++-- drivers/md/dm-ioctl.c | 14 +++++--- drivers/md/dm-linear.c | 7 ++-- drivers/md/dm-mpath.c | 8 ++--- drivers/md/dm-raid.c | 8 ++--- drivers/md/dm-raid1.c | 8 ++--- drivers/md/dm-snap.c | 16 ++++----- drivers/md/dm-stripe.c | 7 ++-- drivers/md/dm-thin.c | 80 ++++++++++++++++++++++++++----------------- drivers/md/dm-verity.c | 8 ++--- include/linux/device-mapper.h | 4 +-- 13 files changed, 99 insertions(+), 115 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f7369f9d8595..2ae151e59c0c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1234,20 +1234,6 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) return 0; } -/* - * Encode key into its hex representation - */ -static void crypt_encode_key(char *hex, u8 *key, unsigned int size) -{ - unsigned int i; - - for (i = 0; i < size; i++) { - sprintf(hex, "%02x", *key); - hex += 2; - key++; - } -} - static void crypt_free_tfms(struct crypt_config *cc) { unsigned i; @@ -1717,11 +1703,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -static int crypt_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void crypt_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; - unsigned int sz = 0; + unsigned i, sz = 0; switch (type) { case STATUSTYPE_INFO: @@ -1731,17 +1717,11 @@ static int crypt_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s ", cc->cipher_string); - if (cc->key_size > 0) { - if ((maxlen - sz) < ((cc->key_size << 1) + 1)) - return -ENOMEM; - - crypt_encode_key(result + sz, cc->key, cc->key_size); - sz += cc->key_size << 1; - } else { - if (sz >= maxlen) - return -ENOMEM; - result[sz++] = '-'; - } + if (cc->key_size > 0) + for (i = 0; i < cc->key_size; i++) + DMEMIT("%02x", cc->key[i]); + else + DMEMIT("-"); DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); @@ -1751,7 +1731,6 @@ static int crypt_status(struct dm_target *ti, status_type_t type, break; } - return 0; } static void crypt_postsuspend(struct dm_target *ti) @@ -1845,7 +1824,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 12, 0}, + .version = {1, 12, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index cc1bd048acb2..c0d03b006e40 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -293,8 +293,8 @@ static int delay_map(struct dm_target *ti, struct bio *bio) return delay_bio(dc, dc->read_delay, bio); } -static int delay_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void delay_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct delay_c *dc = ti->private; int sz = 0; @@ -314,8 +314,6 @@ static int delay_status(struct dm_target *ti, status_type_t type, dc->write_delay); break; } - - return 0; } static int delay_iterate_devices(struct dm_target *ti, @@ -337,7 +335,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 2, 0}, + .version = {1, 2, 1}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 9721f2ffb1a2..5d6c04cceee0 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -337,8 +337,8 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) return error; } -static int flakey_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void flakey_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct flakey_c *fc = ti->private; @@ -368,7 +368,6 @@ static int flakey_status(struct dm_target *ti, status_type_t type, break; } - return 0; } static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) @@ -411,7 +410,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 3, 0}, + .version = {1, 3, 1}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0666b5d14b88..eee353da3742 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1067,6 +1067,7 @@ static void retrieve_status(struct dm_table *table, num_targets = dm_table_get_num_targets(table); for (i = 0; i < num_targets; i++) { struct dm_target *ti = dm_table_get_target(table, i); + size_t l; remaining = len - (outptr - outbuf); if (remaining <= sizeof(struct dm_target_spec)) { @@ -1093,14 +1094,17 @@ static void retrieve_status(struct dm_table *table, if (ti->type->status) { if (param->flags & DM_NOFLUSH_FLAG) status_flags |= DM_STATUS_NOFLUSH_FLAG; - if (ti->type->status(ti, type, status_flags, outptr, remaining)) { - param->flags |= DM_BUFFER_FULL_FLAG; - break; - } + ti->type->status(ti, type, status_flags, outptr, remaining); } else outptr[0] = '\0'; - outptr += strlen(outptr) + 1; + l = strlen(outptr) + 1; + if (l == remaining) { + param->flags |= DM_BUFFER_FULL_FLAG; + break; + } + + outptr += l; used = param->data_start + (outptr - outbuf); outptr = align_ptr(outptr); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 328cad5617ab..5be301c1e809 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -95,8 +95,8 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } -static int linear_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void linear_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -110,7 +110,6 @@ static int linear_status(struct dm_target *ti, status_type_t type, (unsigned long long)lc->start); break; } - return 0; } static int linear_ioctl(struct dm_target *ti, unsigned int cmd, @@ -155,7 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 2, 0}, + .version = {1, 2, 1}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 573bd04591bf..d267bb5705e9 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1378,8 +1378,8 @@ static void multipath_resume(struct dm_target *ti) * [priority selector-name num_ps_args [ps_args]* * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ -static int multipath_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void multipath_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int sz = 0; unsigned long flags; @@ -1485,8 +1485,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type, } spin_unlock_irqrestore(&m->lock, flags); - - return 0; } static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) @@ -1695,7 +1693,7 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 5, 0}, + .version = {1, 5, 1}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9e58dbd8d8cb..5a578d89da2d 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1201,8 +1201,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -static int raid_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void raid_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct raid_set *rs = ti->private; unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ @@ -1344,8 +1344,6 @@ static int raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" -"); } } - - return 0; } static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) @@ -1405,7 +1403,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 4, 1}, + .version = {1, 4, 2}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fa519185ebba..7f2419099b50 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1347,8 +1347,8 @@ static char device_status_char(struct mirror *m) } -static int mirror_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void mirror_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; @@ -1383,8 +1383,6 @@ static int mirror_status(struct dm_target *ti, status_type_t type, if (ms->features & DM_RAID1_HANDLE_ERRORS) DMEMIT(" 1 handle_errors"); } - - return 0; } static int mirror_iterate_devices(struct dm_target *ti, @@ -1403,7 +1401,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 13, 1}, + .version = {1, 13, 2}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 10079e07edf4..6e45e3774eab 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1836,8 +1836,8 @@ static void snapshot_merge_resume(struct dm_target *ti) start_merge(s); } -static int snapshot_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void snapshot_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct dm_snapshot *snap = ti->private; @@ -1883,8 +1883,6 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, maxlen - sz); break; } - - return 0; } static int snapshot_iterate_devices(struct dm_target *ti, @@ -2138,8 +2136,8 @@ static void origin_resume(struct dm_target *ti) ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } -static int origin_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void origin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_dev *dev = ti->private; @@ -2152,8 +2150,6 @@ static int origin_status(struct dm_target *ti, status_type_t type, snprintf(result, maxlen, "%s", dev->name); break; } - - return 0; } static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, @@ -2180,7 +2176,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 8, 0}, + .version = {1, 8, 1}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2193,7 +2189,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 11, 0}, + .version = {1, 11, 1}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c89cde86d400..aaecefa63935 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -312,8 +312,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) * */ -static int stripe_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void stripe_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; char buffer[sc->stripes + 1]; @@ -340,7 +340,6 @@ static int stripe_status(struct dm_target *ti, status_type_t type, (unsigned long long)sc->stripe[i].physical_start); break; } - return 0; } static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) @@ -428,7 +427,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static struct target_type stripe_target = { .name = "striped", - .version = {1, 5, 0}, + .version = {1, 5, 1}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 5409607d4875..7a66d73148e6 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2299,8 +2299,8 @@ static void emit_flags(struct pool_features *pf, char *result, * / * / */ -static int pool_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void pool_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int r; unsigned sz = 0; @@ -2326,32 +2326,41 @@ static int pool_status(struct dm_target *ti, status_type_t type, if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) (void) commit_or_fallback(pool); - r = dm_pool_get_metadata_transaction_id(pool->pmd, - &transaction_id); - if (r) - return r; + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); + if (r) { + DMERR("dm_pool_get_metadata_transaction_id returned %d", r); + goto err; + } - r = dm_pool_get_free_metadata_block_count(pool->pmd, - &nr_free_blocks_metadata); - if (r) - return r; + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata); + if (r) { + DMERR("dm_pool_get_free_metadata_block_count returned %d", r); + goto err; + } r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_metadata_dev_size returned %d", r); + goto err; + } - r = dm_pool_get_free_block_count(pool->pmd, - &nr_free_blocks_data); - if (r) - return r; + r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data); + if (r) { + DMERR("dm_pool_get_free_block_count returned %d", r); + goto err; + } r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_data_dev_size returned %d", r); + goto err; + } r = dm_pool_get_metadata_snap(pool->pmd, &held_root); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_metadata_snap returned %d", r); + goto err; + } DMEMIT("%llu %llu/%llu %llu/%llu ", (unsigned long long)transaction_id, @@ -2388,8 +2397,10 @@ static int pool_status(struct dm_target *ti, status_type_t type, emit_flags(&pt->requested_pf, result, sz, maxlen); break; } + return; - return 0; +err: + DMEMIT("Error"); } static int pool_iterate_devices(struct dm_target *ti, @@ -2468,7 +2479,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 6, 0}, + .version = {1, 6, 1}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2676,8 +2687,8 @@ static void thin_postsuspend(struct dm_target *ti) /* * */ -static int thin_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void thin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int r; ssize_t sz = 0; @@ -2687,7 +2698,7 @@ static int thin_status(struct dm_target *ti, status_type_t type, if (get_pool_mode(tc->pool) == PM_FAIL) { DMEMIT("Fail"); - return 0; + return; } if (!tc->td) @@ -2696,12 +2707,16 @@ static int thin_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: r = dm_thin_get_mapped_count(tc->td, &mapped); - if (r) - return r; + if (r) { + DMERR("dm_thin_get_mapped_count returned %d", r); + goto err; + } r = dm_thin_get_highest_mapped_block(tc->td, &highest); - if (r < 0) - return r; + if (r < 0) { + DMERR("dm_thin_get_highest_mapped_block returned %d", r); + goto err; + } DMEMIT("%llu ", mapped * tc->pool->sectors_per_block); if (r) @@ -2721,7 +2736,10 @@ static int thin_status(struct dm_target *ti, status_type_t type, } } - return 0; + return; + +err: + DMEMIT("Error"); } static int thin_iterate_devices(struct dm_target *ti, @@ -2748,7 +2766,7 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 7, 0}, + .version = {1, 7, 1}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 52cde982164a..6ad538375c3c 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -508,8 +508,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio) /* * Status: V (valid) or C (corruption found) */ -static int verity_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void verity_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; unsigned sz = 0; @@ -540,8 +540,6 @@ static int verity_status(struct dm_target *ti, status_type_t type, DMEMIT("%02x", v->salt[x]); break; } - - return 0; } static int verity_ioctl(struct dm_target *ti, unsigned cmd, @@ -860,7 +858,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 1, 0}, + .version = {1, 1, 1}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index bf6afa2fc432..a5cda3ea6b88 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -68,8 +68,8 @@ typedef void (*dm_postsuspend_fn) (struct dm_target *ti); typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); -typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - unsigned status_flags, char *result, unsigned maxlen); +typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, + unsigned status_flags, char *result, unsigned maxlen); typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); -- cgit v1.2.3 From 55a62eef8d1b50ceff3b7bf46851103bdcc7e5b0 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 1 Mar 2013 22:45:47 +0000 Subject: dm: rename request variables to bios Use 'bio' in the name of variables and functions that deal with bios rather than 'request' to avoid confusion with the normal block layer use of 'request'. No functional changes. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 6 +++--- drivers/md/dm-delay.c | 4 ++-- drivers/md/dm-flakey.c | 4 ++-- drivers/md/dm-linear.c | 6 +++--- drivers/md/dm-mpath.c | 4 ++-- drivers/md/dm-raid.c | 2 +- drivers/md/dm-raid1.c | 4 ++-- drivers/md/dm-snap.c | 10 +++++----- drivers/md/dm-stripe.c | 20 +++++++++---------- drivers/md/dm-table.c | 10 +++++----- drivers/md/dm-target.c | 2 +- drivers/md/dm-thin.c | 12 +++++------ drivers/md/dm-zero.c | 2 +- drivers/md/dm.c | 46 +++++++++++++++++++++---------------------- include/linux/device-mapper.h | 30 ++++++++++++++-------------- 15 files changed, 81 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 2ae151e59c0c..13c15480d940 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1637,7 +1637,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (opt_params == 1 && opt_string && !strcasecmp(opt_string, "allow_discards")) - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; else if (opt_params) { ret = -EINVAL; ti->error = "Invalid feature arguments"; @@ -1665,7 +1665,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->discard_zeroes_data_unsupported = true; return 0; @@ -1726,7 +1726,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); - if (ti->num_discard_requests) + if (ti->num_discard_bios) DMEMIT(" 1 allow_discards"); break; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index c0d03b006e40..496d5f3646a5 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -198,8 +198,8 @@ out: mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->private = dc; return 0; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 5d6c04cceee0..7fcf21cb4ff8 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -216,8 +216,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 5be301c1e809..4f99d267340c 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -53,9 +53,9 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; - ti->num_write_same_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; + ti->num_write_same_bios = 1; ti->private = lc; return 0; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d267bb5705e9..51bb81676be3 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -905,8 +905,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; return 0; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5a578d89da2d..9a01d1e4c783 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1151,7 +1151,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; mutex_lock(&rs->md.reconfig_mutex); ret = md_run(&rs->md); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7f2419099b50..e2ea97723e10 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1072,8 +1072,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto err_free_context; - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6e45e3774eab..5d78027e07ac 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1037,7 +1037,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) int i; int r = -EINVAL; char *origin_path, *cow_path; - unsigned args_used, num_flush_requests = 1; + unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; if (argc != 4) { @@ -1047,7 +1047,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (dm_target_is_snapshot_merge(ti)) { - num_flush_requests = 2; + num_flush_bios = 2; origin_mode = FMODE_WRITE; } @@ -1127,7 +1127,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->tracked_chunk_lock); ti->private = s; - ti->num_flush_requests = num_flush_requests; + ti->num_flush_bios = num_flush_bios; ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); if (bio->bi_rw & REQ_FLUSH) { - if (!dm_bio_get_target_request_nr(bio)) + if (!dm_bio_get_target_bio_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; @@ -2102,7 +2102,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = dev; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; return 0; } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index aaecefa63935..d8837d313f54 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -160,9 +160,9 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) return r; - ti->num_flush_requests = stripes; - ti->num_discard_requests = stripes; - ti->num_write_same_requests = stripes; + ti->num_flush_bios = stripes; + ti->num_discard_bios = stripes; + ti->num_write_same_bios = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -276,19 +276,19 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; - unsigned target_request_nr; + unsigned target_bio_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev; return DM_MAPIO_REMAPPED; } if (unlikely(bio->bi_rw & REQ_DISCARD) || unlikely(bio->bi_rw & REQ_WRITE_SAME)) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_range(sc, bio, target_request_nr); + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + return stripe_map_range(sc, bio, target_bio_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c95405d04726..e50dad0c65f4 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -822,8 +822,8 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests && tgt->discards_supported) - DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", + if (!tgt->num_discard_bios && tgt->discards_supported) + DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", dm_device_name(t->md), type); return 0; @@ -1359,7 +1359,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_flush_requests) + if (!ti->num_flush_bios) continue; if (ti->flush_supported) @@ -1438,7 +1438,7 @@ static bool dm_table_supports_write_same(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_write_same_requests) + if (!ti->num_write_same_bios) return false; if (!ti->type->iterate_devices || @@ -1656,7 +1656,7 @@ bool dm_table_supports_discards(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_discard_requests) + if (!ti->num_discard_bios) continue; if (ti->discards_supported) diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 617d21a77256..37ba5db71cd9 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -116,7 +116,7 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) /* * Return error for discards instead of -EOPNOTSUPP */ - tt->num_discard_requests = 1; + tt->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 088f6b34f599..303e11da7c2a 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1944,7 +1944,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; /* * Only need to enable discards if the pool should pass @@ -1952,7 +1952,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * processing will cause mappings to be removed from the btree. */ if (pf.discard_enabled && pf.discard_passdown) { - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; /* * Setting 'discards_supported' circumvents the normal @@ -2593,17 +2593,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (r) goto bad_thin_open; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->flush_supported = true; ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; ti->discard_zeroes_data_unsupported = true; - /* Discard requests must be split on a block boundary */ - ti->split_discard_requests = true; + /* Discard bios must be split on a block boundary */ + ti->split_discard_bios = true; } dm_put(pool_md); diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index 69a5c3b3b340..c99003e0d47a 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -25,7 +25,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Silently drop discards, avoiding -EOPNOTSUPP. */ - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4521d72637c2..caef71befc43 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1103,7 +1103,7 @@ static void clone_bio(struct dm_target_io *tio, struct bio *bio, static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti, int nr_iovecs, - unsigned target_request_nr) + unsigned target_bio_nr) { struct dm_target_io *tio; struct bio *clone; @@ -1114,15 +1114,15 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); - tio->target_request_nr = target_request_nr; + tio->target_bio_nr = target_bio_nr; return tio; } static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, - unsigned request_nr, sector_t len) + unsigned target_bio_nr, sector_t len) { - struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, request_nr); + struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; /* @@ -1137,13 +1137,13 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, __map_bio(tio); } -static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, - unsigned num_requests, sector_t len) +static void __issue_target_bios(struct clone_info *ci, struct dm_target *ti, + unsigned num_bios, sector_t len) { - unsigned request_nr; + unsigned target_bio_nr; - for (request_nr = 0; request_nr < num_requests; request_nr++) - __issue_target_request(ci, ti, request_nr, len); + for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++) + __issue_target_request(ci, ti, target_bio_nr, len); } static int __clone_and_map_empty_flush(struct clone_info *ci) @@ -1153,7 +1153,7 @@ static int __clone_and_map_empty_flush(struct clone_info *ci) BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __issue_target_requests(ci, ti, ti->num_flush_requests, 0); + __issue_target_bios(ci, ti, ti->num_flush_bios, 0); return 0; } @@ -1173,32 +1173,32 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) ci->sector_count = 0; } -typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); +typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); -static unsigned get_num_discard_requests(struct dm_target *ti) +static unsigned get_num_discard_bios(struct dm_target *ti) { - return ti->num_discard_requests; + return ti->num_discard_bios; } -static unsigned get_num_write_same_requests(struct dm_target *ti) +static unsigned get_num_write_same_bios(struct dm_target *ti) { - return ti->num_write_same_requests; + return ti->num_write_same_bios; } typedef bool (*is_split_required_fn)(struct dm_target *ti); static bool is_split_required_for_discard(struct dm_target *ti) { - return ti->split_discard_requests; + return ti->split_discard_bios; } static int __clone_and_map_changing_extent_only(struct clone_info *ci, - get_num_requests_fn get_num_requests, + get_num_bios_fn get_num_bios, is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; - unsigned num_requests; + unsigned num_bios; do { ti = dm_table_find_target(ci->map, ci->sector); @@ -1211,8 +1211,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, * reconfiguration might also have changed that since the * check was performed. */ - num_requests = get_num_requests ? get_num_requests(ti) : 0; - if (!num_requests) + num_bios = get_num_bios ? get_num_bios(ti) : 0; + if (!num_bios) return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) @@ -1220,7 +1220,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, else len = min(ci->sector_count, max_io_len(ci->sector, ti)); - __issue_target_requests(ci, ti, num_requests, len); + __issue_target_bios(ci, ti, num_bios, len); ci->sector += len; } while (ci->sector_count -= len); @@ -1230,13 +1230,13 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, static int __clone_and_map_discard(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, + return __clone_and_map_changing_extent_only(ci, get_num_discard_bios, is_split_required_for_discard); } static int __clone_and_map_write_same(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); + return __clone_and_map_changing_extent_only(ci, get_num_write_same_bios, NULL); } static int __clone_and_map(struct clone_info *ci) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a5cda3ea6b88..d5f984b07466 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -187,26 +187,26 @@ struct dm_target { uint32_t max_io_len; /* - * A number of zero-length barrier requests that will be submitted + * A number of zero-length barrier bios that will be submitted * to the target for the purpose of flushing cache. * - * The request number can be accessed with dm_bio_get_target_request_nr. - * It is a responsibility of the target driver to remap these requests + * The bio number can be accessed with dm_bio_get_target_bio_nr. + * It is a responsibility of the target driver to remap these bios * to the real underlying devices. */ - unsigned num_flush_requests; + unsigned num_flush_bios; /* - * The number of discard requests that will be submitted to the target. - * The request number can be accessed with dm_bio_get_target_request_nr. + * The number of discard bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_discard_requests; + unsigned num_discard_bios; /* - * The number of WRITE SAME requests that will be submitted to the target. - * The request number can be accessed with dm_bio_get_target_request_nr. + * The number of WRITE SAME bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_write_same_requests; + unsigned num_write_same_bios; /* * The minimum number of extra bytes allocated in each bio for the @@ -233,10 +233,10 @@ struct dm_target { bool discards_supported:1; /* - * Set if the target required discard request to be split + * Set if the target required discard bios to be split * on max_io_len boundary. */ - bool split_discard_requests:1; + bool split_discard_bios:1; /* * Set if this target does not return zeroes on discarded blocks. @@ -261,7 +261,7 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; union map_info info; - unsigned target_request_nr; + unsigned target_bio_nr; struct bio clone; }; @@ -275,9 +275,9 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); } -static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio) +static inline unsigned dm_bio_get_target_bio_nr(const struct bio *bio) { - return container_of(bio, struct dm_target_io, clone)->target_request_nr; + return container_of(bio, struct dm_target_io, clone)->target_bio_nr; } int dm_register_target(struct target_type *t); -- cgit v1.2.3 From df5d2e9089c7d5b8c46f767e4278610ea3e815b9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm kcopyd: introduce configurable throttling This patch allows the administrator to reduce the rate at which kcopyd issues I/O. Each module that uses kcopyd acquires a throttle parameter that can be set in /sys/module/*/parameters. We maintain a history of kcopyd usage by each module in the variables io_period and total_period in struct dm_kcopyd_throttle. The actual kcopyd activity is calculated as a percentage of time equal to "(100 * io_period / total_period)". This is compared with the user-defined throttle percentage threshold and if it is exceeded, we sleep. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 121 +++++++++++++++++++++++++++++++++++++++++++++- drivers/md/dm-raid1.c | 5 +- drivers/md/dm-snap.c | 5 +- drivers/md/dm-thin.c | 5 +- include/linux/dm-kcopyd.h | 25 +++++++++- 5 files changed, 156 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 68c02673263b..d581fe5d2faf 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,8 @@ struct dm_kcopyd_client { struct workqueue_struct *kcopyd_wq; struct work_struct kcopyd_work; + struct dm_kcopyd_throttle *throttle; + /* * We maintain three lists of jobs: * @@ -68,6 +71,117 @@ struct dm_kcopyd_client { static struct page_list zero_page_list; +static DEFINE_SPINLOCK(throttle_spinlock); + +/* + * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. + * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided + * by 2. + */ +#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ + +/* + * Sleep this number of milliseconds. + * + * The value was decided experimentally. + * Smaller values seem to cause an increased copy rate above the limit. + * The reason for this is unknown but possibly due to jiffies rounding errors + * or read/write cache inside the disk. + */ +#define SLEEP_MSEC 100 + +/* + * Maximum number of sleep events. There is a theoretical livelock if more + * kcopyd clients do work simultaneously which this limit avoids. + */ +#define MAX_SLEEPS 10 + +static void io_job_start(struct dm_kcopyd_throttle *t) +{ + unsigned throttle, now, difference; + int slept = 0, skew; + + if (unlikely(!t)) + return; + +try_again: + spin_lock_irq(&throttle_spinlock); + + throttle = ACCESS_ONCE(t->throttle); + + if (likely(throttle >= 100)) + goto skip_limit; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + if (t->num_io_jobs) + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + + if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { + int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); + t->total_period >>= shift; + t->io_period >>= shift; + } + + skew = t->io_period - throttle * t->total_period / 100; + + if (unlikely(skew > 0) && slept < MAX_SLEEPS) { + slept++; + spin_unlock_irq(&throttle_spinlock); + msleep(SLEEP_MSEC); + goto try_again; + } + +skip_limit: + t->num_io_jobs++; + + spin_unlock_irq(&throttle_spinlock); +} + +static void io_job_finish(struct dm_kcopyd_throttle *t) +{ + unsigned long flags; + + if (unlikely(!t)) + return; + + spin_lock_irqsave(&throttle_spinlock, flags); + + t->num_io_jobs--; + + if (likely(ACCESS_ONCE(t->throttle) >= 100)) + goto skip_limit; + + if (!t->num_io_jobs) { + unsigned now, difference; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + } + +skip_limit: + spin_unlock_irqrestore(&throttle_spinlock, flags); +} + + static void wake(struct dm_kcopyd_client *kc) { queue_work(kc->kcopyd_wq, &kc->kcopyd_work); @@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context) struct kcopyd_job *job = (struct kcopyd_job *) context; struct dm_kcopyd_client *kc = job->kc; + io_job_finish(kc->throttle); + if (error) { if (job->rw & WRITE) job->write_err |= error; @@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job) .client = job->kc->io_client, }; + io_job_start(job->kc->throttle); + if (job->rw == READ) r = dm_io(&io_req, 1, &job->source, NULL); else @@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -struct dm_kcopyd_client *dm_kcopyd_client_create(void) +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) { int r = -ENOMEM; struct dm_kcopyd_client *kc; @@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void) INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); + kc->throttle = throttle; kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); if (!kc->job_pool) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index e2ea97723e10..d053098c6a91 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -82,6 +82,9 @@ struct mirror_set { struct mirror mirror[0]; }; +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle, + "A percentage of time allocated for raid resynchronization"); + static void wakeup_mirrord(void *context) { struct mirror_set *ms = context; @@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - ms->kcopyd_client = dm_kcopyd_client_create(); + ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(ms->kcopyd_client)) { r = PTR_ERR(ms->kcopyd_client); goto err_destroy_wq; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 58c2b5881377..c0e07026a8d1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -124,6 +124,9 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + struct dm_dev *dm_snap_origin(struct dm_snapshot *s) { return s->origin; @@ -1108,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - s->kcopyd_client = dm_kcopyd_client_create(); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); ti->error = "Could not create kcopyd client"; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 303e11da7c2a..35d9d0396cc2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -26,6 +26,9 @@ #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + /* * The block size of the device holding pool data must be * between 64KB and 1GB. @@ -1642,7 +1645,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_prison; } - pool->copier = dm_kcopyd_client_create(); + pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(pool->copier)) { r = PTR_ERR(pool->copier); *error = "Error creating pool's kcopyd client"; diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 47d9d376e4e7..f486d636b82e 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -21,11 +21,34 @@ #define DM_KCOPYD_IGNORE_ERROR 1 +struct dm_kcopyd_throttle { + unsigned throttle; + unsigned num_io_jobs; + unsigned io_period; + unsigned total_period; + unsigned last_jiffies; +}; + +/* + * kcopyd clients that want to support throttling must pass an initialised + * dm_kcopyd_throttle struct into dm_kcopyd_client_create(). + * Two or more clients may share the same instance of this struct between + * them if they wish to be throttled as a group. + * + * This macro also creates a corresponding module parameter to configure + * the amount of throttling. + */ +#define DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(name, description) \ +static struct dm_kcopyd_throttle dm_kcopyd_throttle = { 100, 0, 0, 0, 0 }; \ +module_param_named(name, dm_kcopyd_throttle.throttle, uint, 0644); \ +MODULE_PARM_DESC(name, description) + /* * To use kcopyd you must first create a dm_kcopyd_client object. + * throttle can be NULL if you don't want any throttling. */ struct dm_kcopyd_client; -struct dm_kcopyd_client *dm_kcopyd_client_create(void); +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* -- cgit v1.2.3 From b0d8ed4d96a26ef3ac54a4aa8911c9413070662e Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm: add target num_write_bios fn Add a num_write_bios function to struct target. If an instance of a target sets this, it will be queried before the target's mapping function is called on a write bio, and the response controls the number of copies of the write bio that the target will receive. This provides a convenient way for a target to send the same data to more than one device. The new cache target uses this in writethrough mode, to send the data both to the cache and the backing device. Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 22 +++++++++++++++------- include/linux/device-mapper.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e417cf0a69ef..7e469260fe5e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1152,15 +1152,23 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti { struct bio *bio = ci->bio; struct dm_target_io *tio; + unsigned target_bio_nr; + unsigned num_target_bios = 1; - tio = alloc_tio(ci, ti, nr_iovecs, 0); - - if (split_bvec) - clone_split_bio(tio, bio, sector, idx, offset, len); - else - clone_bio(tio, bio, sector, idx, bv_count, len); + /* + * Does the target want to receive duplicate copies of the bio? + */ + if (bio_data_dir(bio) == WRITE && ti->num_write_bios) + num_target_bios = ti->num_write_bios(ti, bio); - __map_bio(tio); + for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { + tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); + if (split_bvec) + clone_split_bio(tio, bio, sector, idx, offset, len); + else + clone_bio(tio, bio, sector, idx, bv_count, len); + __map_bio(tio); + } } typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d5f984b07466..1e483fa7afb4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -175,6 +175,14 @@ struct target_type { #define DM_TARGET_IMMUTABLE 0x00000004 #define dm_target_is_immutable(type) ((type)->features & DM_TARGET_IMMUTABLE) +/* + * Some targets need to be sent the same WRITE bio severals times so + * that they can send copies of it to different devices. This function + * examines any supplied bio and returns the number of copies of it the + * target requires. + */ +typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio); + struct dm_target { struct dm_table *table; struct target_type *type; @@ -214,6 +222,13 @@ struct dm_target { */ unsigned per_bio_data_size; + /* + * If defined, this function is called to find out how many + * duplicate bios should be sent to the target when writing + * data. + */ + dm_num_write_bios_fn num_write_bios; + /* target specific data */ void *private; -- cgit v1.2.3 From dd37978c50bc8b354e5c4633f69387f16572fdac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Mar 2013 19:48:30 -0500 Subject: cache the value of file_inode() in struct file Note that this thing does *not* contribute to inode refcount; it's pinned down by dentry. Signed-off-by: Al Viro --- fs/file_table.c | 2 ++ fs/open.c | 3 ++- include/linux/fs.h | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/file_table.c b/fs/file_table.c index aa07d3684a2e..cd4d87a82951 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -176,6 +176,7 @@ struct file *alloc_file(struct path *path, fmode_t mode, return file; file->f_path = *path; + file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; file->f_mode = mode; file->f_op = fop; @@ -258,6 +259,7 @@ static void __fput(struct file *file) drop_file_write_access(file); file->f_path.dentry = NULL; file->f_path.mnt = NULL; + file->f_inode = NULL; file_free(file); dput(dentry); mntput(mnt); diff --git a/fs/open.c b/fs/open.c index 62f907e3bc36..806d4589559f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -689,7 +689,7 @@ static int do_dentry_open(struct file *f, f->f_mode = FMODE_PATH; path_get(&f->f_path); - inode = file_inode(f); + inode = f->f_inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, f->f_path.mnt); if (error) @@ -752,6 +752,7 @@ cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; f->f_path.dentry = NULL; + f->f_inode = NULL; return error; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e686a099465..74a907b8b950 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,6 +769,7 @@ struct file { } f_u; struct path f_path; #define f_dentry f_path.dentry + struct inode *f_inode; /* cached value */ const struct file_operations *f_op; /* @@ -2217,7 +2218,7 @@ static inline bool execute_ok(struct inode *inode) static inline struct inode *file_inode(struct file *f) { - return f->f_path.dentry->d_inode; + return f->f_inode; } /* -- cgit v1.2.3 From dcf787f39162ce32ca325b3e784aba2d2444619a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Mar 2013 23:51:07 -0500 Subject: constify path_get/path_put and fs_struct.c stuff Signed-off-by: Al Viro --- fs/fs_struct.c | 6 +++--- fs/internal.h | 2 +- fs/namei.c | 4 ++-- include/linux/fs_struct.h | 4 ++-- include/linux/path.h | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_struct.c b/fs/fs_struct.c index fe6ca583bbc0..d8ac61d0c932 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -10,7 +10,7 @@ * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. */ -void set_fs_root(struct fs_struct *fs, struct path *path) +void set_fs_root(struct fs_struct *fs, const struct path *path) { struct path old_root; @@ -29,7 +29,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path) * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. * It can block. */ -void set_fs_pwd(struct fs_struct *fs, struct path *path) +void set_fs_pwd(struct fs_struct *fs, const struct path *path) { struct path old_pwd; @@ -53,7 +53,7 @@ static inline int replace_path(struct path *p, const struct path *old, const str return 1; } -void chroot_fs_refs(struct path *old_root, struct path *new_root) +void chroot_fs_refs(const struct path *old_root, const struct path *new_root) { struct task_struct *g, *p; struct fs_struct *fs; diff --git a/fs/internal.h b/fs/internal.h index 2f6af7f645eb..507141fceb99 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -69,7 +69,7 @@ extern void __mnt_drop_write_file(struct file *); /* * fs_struct.c */ -extern void chroot_fs_refs(struct path *, struct path *); +extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c diff --git a/fs/namei.c b/fs/namei.c index dc984fee5532..961bc1268366 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -451,7 +451,7 @@ int inode_permission(struct inode *inode, int mask) * * Given a path increment the reference count to the dentry and the vfsmount. */ -void path_get(struct path *path) +void path_get(const struct path *path) { mntget(path->mnt); dget(path->dentry); @@ -464,7 +464,7 @@ EXPORT_SYMBOL(path_get); * * Given a path decrement the reference count to the dentry and the vfsmount. */ -void path_put(struct path *path) +void path_put(const struct path *path) { dput(path->dentry); mntput(path->mnt); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index d0ae3a84bcfb..729eded4b24f 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -17,8 +17,8 @@ struct fs_struct { extern struct kmem_cache *fs_cachep; extern void exit_fs(struct task_struct *); -extern void set_fs_root(struct fs_struct *, struct path *); -extern void set_fs_pwd(struct fs_struct *, struct path *); +extern void set_fs_root(struct fs_struct *, const struct path *); +extern void set_fs_pwd(struct fs_struct *, const struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void free_fs_struct(struct fs_struct *); extern int unshare_fs_struct(void); diff --git a/include/linux/path.h b/include/linux/path.h index edc98dec6266..d1372186f431 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -9,8 +9,8 @@ struct path { struct dentry *dentry; }; -extern void path_get(struct path *); -extern void path_put(struct path *); +extern void path_get(const struct path *); +extern void path_put(const struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { -- cgit v1.2.3 From 20e6926dcbafa1b361f1c29d967688be14b6ca4b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 1 Mar 2013 14:51:27 -0800 Subject: x86, ACPI, mm: Revert movablemem_map support Tim found: WARNING: at arch/x86/kernel/smpboot.c:324 topology_sane.isra.2+0x6f/0x80() Hardware name: S2600CP sched: CPU #1's llc-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency. smpboot: Booting Node 1, Processors #1 Modules linked in: Pid: 0, comm: swapper/1 Not tainted 3.9.0-0-generic #1 Call Trace: set_cpu_sibling_map+0x279/0x449 start_secondary+0x11d/0x1e5 Don Morris reproduced on a HP z620 workstation, and bisected it to commit e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock is ready") It turns out movable_map has some problems, and it breaks several things 1. numa_init is called several times, NOT just for srat. so those nodes_clear(numa_nodes_parsed) memset(&numa_meminfo, 0, sizeof(numa_meminfo)) can not be just removed. Need to consider sequence is: numaq, srat, amd, dummy. and make fall back path working. 2. simply split acpi_numa_init to early_parse_srat. a. that early_parse_srat is NOT called for ia64, so you break ia64. b. for (i = 0; i < MAX_LOCAL_APIC; i++) set_apicid_to_node(i, NUMA_NO_NODE) still left in numa_init. So it will just clear result from early_parse_srat. it should be moved before that.... c. it breaks ACPI_TABLE_OVERIDE...as the acpi table scan is moved early before override from INITRD is settled. 3. that patch TITLE is total misleading, there is NO x86 in the title, but it changes critical x86 code. It caused x86 guys did not pay attention to find the problem early. Those patches really should be routed via tip/x86/mm. 4. after that commit, following range can not use movable ram: a. real_mode code.... well..funny, legacy Node0 [0,1M) could be hot-removed? b. initrd... it will be freed after booting, so it could be on movable... c. crashkernel for kdump...: looks like we can not put kdump kernel above 4G anymore. d. init_mem_mapping: can not put page table high anymore. e. initmem_init: vmemmap can not be high local node anymore. That is not good. If node is hotplugable, the mem related range like page table and vmemmap could be on the that node without problem and should be on that node. We have workaround patch that could fix some problems, but some can not be fixed. So just remove that offending commit and related ones including: f7210e6c4ac7 ("mm/memblock.c: use CONFIG_HAVE_MEMBLOCK_NODE_MAP to protect movablecore_map in memblock_overlaps_region().") 01a178a94e8e ("acpi, memory-hotplug: support getting hotplug info from SRAT") 27168d38fa20 ("acpi, memory-hotplug: extend movablemem_map ranges to the end of node") e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock is ready") fb06bc8e5f42 ("page_alloc: bootmem limit with movablecore_map") 42f47e27e761 ("page_alloc: make movablemem_map have higher priority") 6981ec31146c ("page_alloc: introduce zone_movable_limit[] to keep movable limit for nodes") 34b71f1e04fc ("page_alloc: add movable_memmap kernel parameter") 4d59a75125d5 ("x86: get pg_data_t's memory from other node") Later we should have patches that will make sure kernel put page table and vmemmap on local node ram instead of push them down to node0. Also need to find way to put other kernel used ram to local node ram. Reported-by: Tim Gardner Reported-by: Don Morris Bisected-by: Don Morris Tested-by: Don Morris Signed-off-by: Yinghai Lu Cc: Tony Luck Cc: Thomas Renninger Cc: Tejun Heo Cc: Tang Chen Cc: Yasuaki Ishimatsu Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 36 ----- arch/x86/kernel/setup.c | 13 +- arch/x86/mm/numa.c | 11 +- arch/x86/mm/srat.c | 125 +--------------- drivers/acpi/numa.c | 23 ++- include/linux/acpi.h | 8 - include/linux/memblock.h | 2 - include/linux/mm.h | 18 --- mm/memblock.c | 50 ------- mm/page_alloc.c | 285 +----------------------------------- 10 files changed, 27 insertions(+), 544 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e567af39ee34..3a54fca730c0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1645,42 +1645,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. that the amount of memory usable for all allocations is not too small. - movablemem_map=acpi - [KNL,X86,IA-64,PPC] This parameter is similar to - memmap except it specifies the memory map of - ZONE_MOVABLE. - This option inform the kernel to use Hot Pluggable bit - in flags from SRAT from ACPI BIOS to determine which - memory devices could be hotplugged. The corresponding - memory ranges will be set as ZONE_MOVABLE. - NOTE: Whatever node the kernel resides in will always - be un-hotpluggable. - - movablemem_map=nn[KMG]@ss[KMG] - [KNL,X86,IA-64,PPC] This parameter is similar to - memmap except it specifies the memory map of - ZONE_MOVABLE. - If user specifies memory ranges, the info in SRAT will - be ingored. And it works like the following: - - If more ranges are all within one node, then from - lowest ss to the end of the node will be ZONE_MOVABLE. - - If a range is within a node, then from ss to the end - of the node will be ZONE_MOVABLE. - - If a range covers two or more nodes, then from ss to - the end of the 1st node will be ZONE_MOVABLE, and all - the rest nodes will only have ZONE_MOVABLE. - If memmap is specified at the same time, the - movablemem_map will be limited within the memmap - areas. If kernelcore or movablecore is also specified, - movablemem_map will have higher priority to be - satisfied. So the administrator should be careful that - the amount of movablemem_map areas are not too large. - Otherwise kernel won't have enough memory to start. - NOTE: We don't stop users specifying the node the - kernel resides in as hotpluggable so that this - option can be used as a workaround of firmware - bugs. - MTD_Partition= [MTD] Format: ,,, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e89acdf6b77b..84d32855f65c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1056,15 +1056,6 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif - /* - * In the memory hotplug case, the kernel needs info from SRAT to - * determine which memory is hotpluggable before allocating memory - * using memblock. - */ - acpi_boot_table_init(); - early_acpi_boot_init(); - early_parse_srat(); - #ifdef CONFIG_X86_32 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", (max_pfn_mapped<cnt; i++) { - if (end <= rgn->regions[i].base || - start >= rgn->regions[i].base + - rgn->regions[i].size) - continue; - - /* - * If the memory range overlaps the memory reserved by - * memblock, then the kernel resides in this node. - */ - node_set(node, movablemem_map.numa_nodes_kernel); - - goto out; - } - - /* - * If the kernel resides in this node, then the whole node - * should not be hotpluggable. - */ - if (node_isset(node, movablemem_map.numa_nodes_kernel)) - goto out; - - insert_movablemem_map(start_pfn, end_pfn); - - /* - * numa_nodes_hotplug nodemask represents which nodes are put - * into movablemem_map.map[]. - */ - node_set(node, movablemem_map.numa_nodes_hotplug); - goto out; - } - - /* - * For movablemem_map=nn[KMG]@ss[KMG]: - * - * SRAT: |_____| |_____| |_________| |_________| ...... - * node id: 0 1 1 2 - * user specified: |__| |___| - * movablemem_map: |___| |_________| |______| ...... - * - * Using movablemem_map, we can prevent memblock from allocating memory - * on ZONE_MOVABLE at boot time. - * - * NOTE: In this case, SRAT info will be ingored. - */ - overlap = movablemem_map_overlap(start_pfn, end_pfn); - if (overlap >= 0) { - /* - * If part of this range is in movablemem_map, we need to - * add the range after it to extend the range to the end - * of the node, because from the min address specified to - * the end of the node will be ZONE_MOVABLE. - */ - start_pfn = max(start_pfn, - movablemem_map.map[overlap].start_pfn); - insert_movablemem_map(start_pfn, end_pfn); - - /* - * Set the nodemask, so that if the address range on one node - * is not continuse, we can add the subsequent ranges on the - * same node into movablemem_map. - */ - node_set(node, movablemem_map.numa_nodes_hotplug); - } else { - if (node_isset(node, movablemem_map.numa_nodes_hotplug)) - /* - * Insert the range if we already have movable ranges - * on the same node. - */ - insert_movablemem_map(start_pfn, end_pfn); - } -out: - return; -} -#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline void -handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) -{ -} -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ - /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ int __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { u64 start, end; - u32 hotpluggable; int node, pxm; if (srat_disabled()) @@ -269,8 +154,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) goto out_err_bad_srat; if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) goto out_err; - hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; - if (hotpluggable && !save_add_info()) + if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) goto out_err; start = ma->base_address; @@ -290,12 +174,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, - (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? "Hot Pluggable": ""); - - handle_movablemem(node, start, end, hotpluggable); + (unsigned long long) start, (unsigned long long) end - 1); return 0; out_err_bad_srat: diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 59844ee149be..33e609f63585 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id, handler, max_entries); } -static int srat_mem_cnt; - -void __init early_parse_srat(void) +int __init acpi_numa_init(void) { + int cnt = 0; + /* * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= * SRAT cpu entries could have different order with that in MADT. @@ -295,24 +295,21 @@ void __init early_parse_srat(void) /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, 0); + acpi_parse_x2apic_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, 0); - srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, - acpi_parse_memory_affinity, - NR_NODE_MEMBLKS); + acpi_parse_processor_affinity, 0); + cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, + acpi_parse_memory_affinity, + NR_NODE_MEMBLKS); } -} -int __init acpi_numa_init(void) -{ /* SLIT: System Locality Information Table */ acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); acpi_numa_arch_fixup(); - if (srat_mem_cnt < 0) - return srat_mem_cnt; + if (cnt < 0) + return cnt; else if (!parsed_numa_memblks) return -ENOENT; return 0; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f46cfd73a553..bcbdd7484e58 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -485,14 +485,6 @@ static inline bool acpi_driver_match_device(struct device *dev, #endif /* !CONFIG_ACPI */ -#ifdef CONFIG_ACPI_NUMA -void __init early_parse_srat(void); -#else -static inline void early_parse_srat(void) -{ -} -#endif - #ifdef CONFIG_ACPI void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, u32 pm1a_ctrl, u32 pm1b_ctrl)); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3e5ecb2d790e..f388203db7e8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,7 +42,6 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; -extern struct movablemem_map movablemem_map; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) @@ -61,7 +60,6 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); diff --git a/include/linux/mm.h b/include/linux/mm.h index e7c3f9a0111a..1ede55f292c2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1333,24 +1333,6 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); extern void sparse_memory_present_with_active_regions(int nid); -#define MOVABLEMEM_MAP_MAX MAX_NUMNODES -struct movablemem_entry { - unsigned long start_pfn; /* start pfn of memory segment */ - unsigned long end_pfn; /* end pfn of memory segment (exclusive) */ -}; - -struct movablemem_map { - bool acpi; /* true if using SRAT info */ - int nr_map; - struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; - nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */ - nodemask_t numa_nodes_kernel; /* on which nodes kernel resides in */ -}; - -extern void __init insert_movablemem_map(unsigned long start_pfn, - unsigned long end_pfn); -extern int __init movablemem_map_overlap(unsigned long start_pfn, - unsigned long end_pfn); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ diff --git a/mm/memblock.c b/mm/memblock.c index 1bcd9b970564..b8d9147e5c08 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -92,58 +92,9 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type, * * Find @size free area aligned to @align in the specified range and node. * - * If we have CONFIG_HAVE_MEMBLOCK_NODE_MAP defined, we need to check if the - * memory we found if not in hotpluggable ranges. - * * RETURNS: * Found address on success, %0 on failure. */ -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, - phys_addr_t end, phys_addr_t size, - phys_addr_t align, int nid) -{ - phys_addr_t this_start, this_end, cand; - u64 i; - int curr = movablemem_map.nr_map - 1; - - /* pump up @end */ - if (end == MEMBLOCK_ALLOC_ACCESSIBLE) - end = memblock.current_limit; - - /* avoid allocating the first page */ - start = max_t(phys_addr_t, start, PAGE_SIZE); - end = max(start, end); - - for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { - this_start = clamp(this_start, start, end); - this_end = clamp(this_end, start, end); - -restart: - if (this_end <= this_start || this_end < size) - continue; - - for (; curr >= 0; curr--) { - if ((movablemem_map.map[curr].start_pfn << PAGE_SHIFT) - < this_end) - break; - } - - cand = round_down(this_end - size, align); - if (curr >= 0 && - cand < movablemem_map.map[curr].end_pfn << PAGE_SHIFT) { - this_end = movablemem_map.map[curr].start_pfn - << PAGE_SHIFT; - goto restart; - } - - if (cand >= this_start) - return cand; - } - - return 0; -} -#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align, int nid) @@ -172,7 +123,6 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, } return 0; } -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ /** * memblock_find_in_range - find free area in given range diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0dade3f18f7d..8fcced7823fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -202,18 +202,11 @@ static unsigned long __meminitdata nr_all_pages; static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -/* Movable memory ranges, will also be used by memblock subsystem. */ -struct movablemem_map movablemem_map = { - .acpi = false, - .nr_map = 0, -}; - static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; static unsigned long __initdata required_kernelcore; static unsigned long __initdata required_movablecore; static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; -static unsigned long __meminitdata zone_movable_limit[MAX_NUMNODES]; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ int movable_zone; @@ -4412,77 +4405,6 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); } -/** - * sanitize_zone_movable_limit - Sanitize the zone_movable_limit array. - * - * zone_movable_limit is initialized as 0. This function will try to get - * the first ZONE_MOVABLE pfn of each node from movablemem_map, and - * assigne them to zone_movable_limit. - * zone_movable_limit[nid] == 0 means no limit for the node. - * - * Note: Each range is represented as [start_pfn, end_pfn) - */ -static void __meminit sanitize_zone_movable_limit(void) -{ - int map_pos = 0, i, nid; - unsigned long start_pfn, end_pfn; - - if (!movablemem_map.nr_map) - return; - - /* Iterate all ranges from minimum to maximum */ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - /* - * If we have found lowest pfn of ZONE_MOVABLE of the node - * specified by user, just go on to check next range. - */ - if (zone_movable_limit[nid]) - continue; - -#ifdef CONFIG_ZONE_DMA - /* Skip DMA memory. */ - if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA]) - start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA]; -#endif - -#ifdef CONFIG_ZONE_DMA32 - /* Skip DMA32 memory. */ - if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA32]) - start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA32]; -#endif - -#ifdef CONFIG_HIGHMEM - /* Skip lowmem if ZONE_MOVABLE is highmem. */ - if (zone_movable_is_highmem() && - start_pfn < arch_zone_lowest_possible_pfn[ZONE_HIGHMEM]) - start_pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM]; -#endif - - if (start_pfn >= end_pfn) - continue; - - while (map_pos < movablemem_map.nr_map) { - if (end_pfn <= movablemem_map.map[map_pos].start_pfn) - break; - - if (start_pfn >= movablemem_map.map[map_pos].end_pfn) { - map_pos++; - continue; - } - - /* - * The start_pfn of ZONE_MOVABLE is either the minimum - * pfn specified by movablemem_map, or 0, which means - * the node has no ZONE_MOVABLE. - */ - zone_movable_limit[nid] = max(start_pfn, - movablemem_map.map[map_pos].start_pfn); - - break; - } - } -} - #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, unsigned long zone_type, @@ -4500,6 +4422,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } + #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, @@ -4941,19 +4864,12 @@ static void __init find_zone_movable_pfns_for_nodes(void) required_kernelcore = max(required_kernelcore, corepages); } - /* - * If neither kernelcore/movablecore nor movablemem_map is specified, - * there is no ZONE_MOVABLE. But if movablemem_map is specified, the - * start pfn of ZONE_MOVABLE has been stored in zone_movable_limit[]. - */ - if (!required_kernelcore) { - if (movablemem_map.nr_map) - memcpy(zone_movable_pfn, zone_movable_limit, - sizeof(zone_movable_pfn)); + /* If kernelcore was not specified, there is no ZONE_MOVABLE */ + if (!required_kernelcore) goto out; - } /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ + find_usable_zone_for_movable(); usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; restart: @@ -4981,24 +4897,10 @@ restart: for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { unsigned long size_pages; - /* - * Find more memory for kernelcore in - * [zone_movable_pfn[nid], zone_movable_limit[nid]). - */ start_pfn = max(start_pfn, zone_movable_pfn[nid]); if (start_pfn >= end_pfn) continue; - if (zone_movable_limit[nid]) { - end_pfn = min(end_pfn, zone_movable_limit[nid]); - /* No range left for kernelcore in this node */ - if (start_pfn >= end_pfn) { - zone_movable_pfn[nid] = - zone_movable_limit[nid]; - break; - } - } - /* Account for what is only usable for kernelcore */ if (start_pfn < usable_startpfn) { unsigned long kernel_pages; @@ -5058,12 +4960,12 @@ restart: if (usable_nodes && required_kernelcore > usable_nodes) goto restart; -out: /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ for (nid = 0; nid < MAX_NUMNODES; nid++) zone_movable_pfn[nid] = roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); +out: /* restore the node_state */ node_states[N_MEMORY] = saved_node_state; } @@ -5126,8 +5028,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Find the PFNs that ZONE_MOVABLE begins at in each node */ memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); - find_usable_zone_for_movable(); - sanitize_zone_movable_limit(); find_zone_movable_pfns_for_nodes(); /* Print out the zone ranges */ @@ -5211,181 +5111,6 @@ static int __init cmdline_parse_movablecore(char *p) early_param("kernelcore", cmdline_parse_kernelcore); early_param("movablecore", cmdline_parse_movablecore); -/** - * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[]. - * @start_pfn: start pfn of the range to be checked - * @end_pfn: end pfn of the range to be checked (exclusive) - * - * This function checks if a given memory range [start_pfn, end_pfn) overlaps - * the movablemem_map.map[] array. - * - * Return: index of the first overlapped element in movablemem_map.map[] - * or -1 if they don't overlap each other. - */ -int __init movablemem_map_overlap(unsigned long start_pfn, - unsigned long end_pfn) -{ - int overlap; - - if (!movablemem_map.nr_map) - return -1; - - for (overlap = 0; overlap < movablemem_map.nr_map; overlap++) - if (start_pfn < movablemem_map.map[overlap].end_pfn) - break; - - if (overlap == movablemem_map.nr_map || - end_pfn <= movablemem_map.map[overlap].start_pfn) - return -1; - - return overlap; -} - -/** - * insert_movablemem_map - Insert a memory range in to movablemem_map.map. - * @start_pfn: start pfn of the range - * @end_pfn: end pfn of the range - * - * This function will also merge the overlapped ranges, and sort the array - * by start_pfn in monotonic increasing order. - */ -void __init insert_movablemem_map(unsigned long start_pfn, - unsigned long end_pfn) -{ - int pos, overlap; - - /* - * pos will be at the 1st overlapped range, or the position - * where the element should be inserted. - */ - for (pos = 0; pos < movablemem_map.nr_map; pos++) - if (start_pfn <= movablemem_map.map[pos].end_pfn) - break; - - /* If there is no overlapped range, just insert the element. */ - if (pos == movablemem_map.nr_map || - end_pfn < movablemem_map.map[pos].start_pfn) { - /* - * If pos is not the end of array, we need to move all - * the rest elements backward. - */ - if (pos < movablemem_map.nr_map) - memmove(&movablemem_map.map[pos+1], - &movablemem_map.map[pos], - sizeof(struct movablemem_entry) * - (movablemem_map.nr_map - pos)); - movablemem_map.map[pos].start_pfn = start_pfn; - movablemem_map.map[pos].end_pfn = end_pfn; - movablemem_map.nr_map++; - return; - } - - /* overlap will be at the last overlapped range */ - for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++) - if (end_pfn < movablemem_map.map[overlap].start_pfn) - break; - - /* - * If there are more ranges overlapped, we need to merge them, - * and move the rest elements forward. - */ - overlap--; - movablemem_map.map[pos].start_pfn = min(start_pfn, - movablemem_map.map[pos].start_pfn); - movablemem_map.map[pos].end_pfn = max(end_pfn, - movablemem_map.map[overlap].end_pfn); - - if (pos != overlap && overlap + 1 != movablemem_map.nr_map) - memmove(&movablemem_map.map[pos+1], - &movablemem_map.map[overlap+1], - sizeof(struct movablemem_entry) * - (movablemem_map.nr_map - overlap - 1)); - - movablemem_map.nr_map -= overlap - pos; -} - -/** - * movablemem_map_add_region - Add a memory range into movablemem_map. - * @start: physical start address of range - * @end: physical end address of range - * - * This function transform the physical address into pfn, and then add the - * range into movablemem_map by calling insert_movablemem_map(). - */ -static void __init movablemem_map_add_region(u64 start, u64 size) -{ - unsigned long start_pfn, end_pfn; - - /* In case size == 0 or start + size overflows */ - if (start + size <= start) - return; - - if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) { - pr_err("movablemem_map: too many entries;" - " ignoring [mem %#010llx-%#010llx]\n", - (unsigned long long) start, - (unsigned long long) (start + size - 1)); - return; - } - - start_pfn = PFN_DOWN(start); - end_pfn = PFN_UP(start + size); - insert_movablemem_map(start_pfn, end_pfn); -} - -/* - * cmdline_parse_movablemem_map - Parse boot option movablemem_map. - * @p: The boot option of the following format: - * movablemem_map=nn[KMG]@ss[KMG] - * - * This option sets the memory range [ss, ss+nn) to be used as movable memory. - * - * Return: 0 on success or -EINVAL on failure. - */ -static int __init cmdline_parse_movablemem_map(char *p) -{ - char *oldp; - u64 start_at, mem_size; - - if (!p) - goto err; - - if (!strcmp(p, "acpi")) - movablemem_map.acpi = true; - - /* - * If user decide to use info from BIOS, all the other user specified - * ranges will be ingored. - */ - if (movablemem_map.acpi) { - if (movablemem_map.nr_map) { - memset(movablemem_map.map, 0, - sizeof(struct movablemem_entry) - * movablemem_map.nr_map); - movablemem_map.nr_map = 0; - } - return 0; - } - - oldp = p; - mem_size = memparse(p, &p); - if (p == oldp) - goto err; - - if (*p == '@') { - oldp = ++p; - start_at = memparse(p, &p); - if (p == oldp || *p != '\0') - goto err; - - movablemem_map_add_region(start_at, mem_size); - return 0; - } -err: - return -EINVAL; -} -early_param("movablemem_map", cmdline_parse_movablemem_map); - #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ /** -- cgit v1.2.3 From 5698c50d9da4ab2f57d98c64ea97675dcaf2a608 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Oct 2012 10:54:47 +0100 Subject: metag: Internal and external irqchips Meta core internal interrupts (from HWSTATMETA and friends) are vectored onto the TR1 core trigger for the current thread. This is demultiplexed in irq-metag.c to individual Linux IRQs for each internal interrupt. External SoC interrupts (from HWSTATEXT and friends) are vectored onto the TR2 core trigger for the current thread. This is demultiplexed in irq-metag-ext.c to individual Linux IRQs for each external SoC interrupt. The external irqchip has devicetree bindings for configuring the number of irq banks and the type of masking available. Signed-off-by: James Hogan Cc: Arnd Bergmann Cc: Grant Likely Cc: Rob Herring Cc: Rob Landley Cc: Dom Cobley Cc: Simon Arlott Cc: Viresh Kumar Cc: Maxime Ripard Cc: devicetree-discuss@lists.ozlabs.org Cc: linux-doc@vger.kernel.org --- .../devicetree/bindings/metag/meta-intc.txt | 82 ++ MAINTAINERS | 2 + arch/metag/kernel/irq.c | 5 + drivers/irqchip/Makefile | 2 + drivers/irqchip/irq-metag-ext.c | 868 +++++++++++++++++++++ drivers/irqchip/irq-metag.c | 343 ++++++++ include/linux/irqchip/metag-ext.h | 33 + include/linux/irqchip/metag.h | 24 + 8 files changed, 1359 insertions(+) create mode 100644 Documentation/devicetree/bindings/metag/meta-intc.txt create mode 100644 drivers/irqchip/irq-metag-ext.c create mode 100644 drivers/irqchip/irq-metag.c create mode 100644 include/linux/irqchip/metag-ext.h create mode 100644 include/linux/irqchip/metag.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt new file mode 100644 index 000000000000..8c47dcbfabc6 --- /dev/null +++ b/Documentation/devicetree/bindings/metag/meta-intc.txt @@ -0,0 +1,82 @@ +* Meta External Trigger Controller Binding + +This binding specifies what properties must be available in the device tree +representation of a Meta external trigger controller. + +Required properties: + + - compatible: Specifies the compatibility list for the interrupt controller. + The type shall be and the value shall include "img,meta-intc". + + - num-banks: Specifies the number of interrupt banks (each of which can + handle 32 interrupt sources). + + - interrupt-controller: The presence of this property identifies the node + as an interupt controller. No property value shall be defined. + + - #interrupt-cells: Specifies the number of cells needed to encode an + interrupt source. The type shall be a and the value shall be 2. + + - #address-cells: Specifies the number of cells needed to encode an + address. The type shall be and the value shall be 0. As such, + 'interrupt-map' nodes do not have to specify a parent unit address. + +Optional properties: + + - no-mask: The controller doesn't have any mask registers. + +* Interrupt Specifier Definition + + Interrupt specifiers consists of 2 cells encoded as follows: + + - <1st-cell>: The interrupt-number that identifies the interrupt source. + + - <2nd-cell>: The Linux interrupt flags containing level-sense information, + encoded as follows: + 1 = edge triggered + 4 = level-sensitive + +* Examples + +Example 1: + + /* + * Meta external trigger block + */ + intc: intc { + // This is an interrupt controller node. + interrupt-controller; + + // No address cells so that 'interrupt-map' nodes which + // reference this interrupt controller node do not need a parent + // address specifier. + #address-cells = <0>; + + // Two cells to encode interrupt sources. + #interrupt-cells = <2>; + + // Number of interrupt banks + num-banks = <2>; + + // No HWMASKEXT is available (specify on Chorus2 and Comet ES1) + no-mask; + + // Compatible with Meta hardware trigger block. + compatible = "img,meta-intc"; + }; + +Example 2: + + /* + * An interrupt generating device that is wired to a Meta external + * trigger block. + */ + uart1: uart@0x02004c00 { + // Interrupt source '5' that is level-sensitive. + // Note that there are only two cells as specified in the + // interrupt parent's '#interrupt-cells' property. + interrupts = <5 4 /* level */>; + + // The interrupt controller that this device is wired to. + interrupt-parent = <&intc>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 749d76699ead..9b2b7699da4d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5040,6 +5040,8 @@ F: arch/metag/ F: Documentation/metag/ F: Documentation/devicetree/bindings/metag/ F: drivers/clocksource/metag_generic.c +F: drivers/irqchip/irq-metag.c +F: drivers/irqchip/irq-metag-ext.c MICROBLAZE ARCHITECTURE M: Michal Simek diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c index 7c043491e1e3..87707efeb0a3 100644 --- a/arch/metag/kernel/irq.c +++ b/arch/metag/kernel/irq.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -258,6 +260,9 @@ void __init init_IRQ(void) irq_ctx_init(smp_processor_id()); + init_internal_IRQ(); + init_external_IRQ(); + if (machine_desc->init_irq) machine_desc->init_irq(); } diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index bf4609a5bd9d..ff02f6b98863 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -1,4 +1,6 @@ obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o +obj-$(CONFIG_METAG) += irq-metag-ext.o +obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi.o obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c new file mode 100644 index 000000000000..92c41ab4dbfd --- /dev/null +++ b/drivers/irqchip/irq-metag-ext.c @@ -0,0 +1,868 @@ +/* + * Meta External interrupt code. + * + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * External interrupts on Meta are configured at two-levels, in the CPU core and + * in the external trigger block. Interrupts from SoC peripherals are + * multiplexed onto a single Meta CPU "trigger" - traditionally it has always + * been trigger 2 (TR2). For info on how de-multiplexing happens check out + * meta_intc_irq_demux(). + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define HWSTAT_STRIDE 8 +#define HWVEC_BLK_STRIDE 0x1000 + +/** + * struct meta_intc_priv - private meta external interrupt data + * @nr_banks: Number of interrupt banks + * @domain: IRQ domain for all banks of external IRQs + * @unmasked: Record of unmasked IRQs + * @levels_altered: Record of altered level bits + */ +struct meta_intc_priv { + unsigned int nr_banks; + struct irq_domain *domain; + + unsigned long unmasked[4]; + +#ifdef CONFIG_METAG_SUSPEND_MEM + unsigned long levels_altered[4]; +#endif +}; + +/* Private data for the one and only external interrupt controller */ +static struct meta_intc_priv meta_intc_priv; + +/** + * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bit offset into the IRQ's bank registers + */ +static unsigned int meta_intc_offset(irq_hw_number_t hw) +{ + return hw & 0x1f; +} + +/** + * meta_intc_bank() - Get the bank number of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bank number indicating which register the IRQ's bits are + */ +static unsigned int meta_intc_bank(irq_hw_number_t hw) +{ + return hw >> 5; +} + +/** + * meta_intc_stat_addr() - Get the address of a HWSTATEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWSTATEXT register containing the status bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWSTATEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_level_addr() - Get the address of a HWLEVELEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWLEVELEXT register containing the sense bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_level_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWLEVELEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_mask_addr() - Get the address of a HWMASKEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWMASKEXT register containing the mask bit for the + * specified hardware IRQ number + */ +static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWMASKEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_vec_addr() - Get the vector address of a hardware interrupt + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWVECEXT register controlling the core trigger to + * vector the IRQ onto + */ +static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWVEC0EXT + + HWVEC_BLK_STRIDE * meta_intc_bank(hw) + + HWVECnEXT_STRIDE * meta_intc_offset(hw)); +} + +/** + * meta_intc_startup_irq() - set up an external irq + * @data: data for the external irq to start up + * + * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and + * unmask irq, both using the appropriate callbacks. + */ +static unsigned int meta_intc_startup_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + int thread = hard_processor_id(); + + /* Perform any necessary acking. */ + if (data->chip->irq_ack) + data->chip->irq_ack(data); + + /* Wire up this interrupt to the core with HWVECxEXT. */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Perform any necessary unmasking. */ + data->chip->irq_unmask(data); + + return 0; +} + +/** + * meta_intc_shutdown_irq() - turn off an external irq + * @data: data for the external irq to turn off + * + * Mask irq using the appropriate callback and stop muxing it onto TR2. + */ +static void meta_intc_shutdown_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + /* Mask the IRQ */ + data->chip->irq_mask(data); + + /* + * Disable the IRQ at the core by removing the interrupt from + * the HW vector mapping. + */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_ack_irq() - acknowledge an external irq + * @data: data for the external irq to ack + * + * Clear down an edge interrupt in the status register. + */ +static void meta_intc_ack_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + + /* Ack the int, if it is still 'on'. + * NOTE - this only works for edge triggered interrupts. + */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * record_irq_is_masked() - record the IRQ masked so it doesn't get handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is masked (by whichever + * callback is used). It records the IRQ masked so that it doesn't get handled + * if it still shows up in the status register. + */ +static void record_irq_is_masked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/** + * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is unmasked (by whichever + * callback is used). It records the IRQ unmasked so that it gets handled if it + * shows up in the status register. + */ +static void record_irq_is_unmasked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/* + * For use by wrapper IRQ drivers + */ + +/** + * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers + * @data: data for the external irq being masked + * + * This should be called by any wrapper IRQ driver mask functions. it doesn't do + * any masking but records the IRQ as masked so that the core code knows the + * mask has taken place. It is the callers responsibility to ensure that the IRQ + * won't trigger an interrupt to the core. + */ +void meta_intc_mask_irq_simple(struct irq_data *data) +{ + record_irq_is_masked(data); +} + +/** + * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers + * @data: data for the external irq being unmasked + * + * This should be called by any wrapper IRQ driver unmask functions. it doesn't + * do any unmasking but records the IRQ as unmasked so that the core code knows + * the unmask has taken place. It is the callers responsibility to ensure that + * the IRQ can now trigger an interrupt to the core. + */ +void meta_intc_unmask_irq_simple(struct irq_data *data) +{ + record_irq_is_unmasked(data); +} + + +/** + * meta_intc_mask_irq() - mask an external irq using HWMASKEXT + * @data: data for the external irq to mask + * + * This is a default implementation of a mask function which makes use of the + * HWMASKEXT registers available in newer versions. + * + * Earlier versions without these registers should use SoC level IRQ masking + * which call the meta_intc_*_simple() functions above, or if that isn't + * available should use the fallback meta_intc_*_nomask() functions below. + */ +static void meta_intc_mask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_masked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) & ~bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT + * @data: data for the external irq to unmask + * + * This is a default implementation of an unmask function which makes use of the + * HWMASKEXT registers available on new versions. It should be paired with + * meta_intc_mask_irq() above. + */ +static void meta_intc_unmask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_unmasked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) | bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring + * @data: data for the external irq to mask + * + * This is the version of the mask function for older versions which don't have + * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is + * unvectored from the core and retriggered if necessary later. + */ +static void meta_intc_mask_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + record_irq_is_masked(data); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to edge interrupts. + */ +static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(stat_addr) & bit) { + metag_out32(bit, stat_addr); + while (!(metag_in32(stat_addr) & bit)) + metag_out32(bit, stat_addr); + } +} + +/** + * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to level interrupts. + */ +static void meta_intc_unmask_level_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Re-trigger interrupt */ + /* Writing a 1 triggers interrupt */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * meta_intc_irq_set_type() - set the type of an external irq + * @data: data for the external irq to set the type of + * @flow_type: new irq flow type + * + * Set the flow type of an external interrupt. This updates the irq chip and irq + * handler depending on whether the irq is edge or level sensitive (the polarity + * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows + * when to trigger. + */ +static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type) +{ +#ifdef CONFIG_METAG_SUSPEND_MEM + struct meta_intc_priv *priv = &meta_intc_priv; +#endif + unsigned int irq = data->irq; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + unsigned long flags; + unsigned int level; + + /* update the chip/handler */ + if (flow_type & IRQ_TYPE_LEVEL_MASK) + __irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip, + handle_level_irq, NULL); + else + __irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip, + handle_edge_irq, NULL); + + /* and clear/set the bit in HWLEVELEXT */ + __global_lock2(flags); + level = metag_in32(level_addr); + if (flow_type & IRQ_TYPE_LEVEL_MASK) + level |= bit; + else + level &= ~bit; + metag_out32(level, level_addr); +#ifdef CONFIG_METAG_SUSPEND_MEM + priv->levels_altered[meta_intc_bank(hw)] |= bit; +#endif + __global_unlock2(flags); + + return 0; +} + +/** + * meta_intc_irq_demux() - external irq de-multiplexer + * @irq: the virtual interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is + * this function's job to demux this irq and figure out exactly which external + * irq needs servicing. + * + * Whilst using TR2 to detect external interrupts is a software convention it is + * (hopefully) unlikely to change. + */ +static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw; + unsigned int bank, irq_no, status; + void __iomem *stat_addr = meta_intc_stat_addr(0); + + /* + * Locate which interrupt has caused our handler to run. + */ + for (bank = 0; bank < priv->nr_banks; ++bank) { + /* Which interrupts are currently pending in this bank? */ +recalculate: + status = metag_in32(stat_addr) & priv->unmasked[bank]; + + for (hw = bank*32; status; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual + * Linux IRQ number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off external interrupts that are + * registered to be handled by the kernel. + * Other external interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } + stat_addr += HWSTAT_STRIDE; + } +} + +#ifdef CONFIG_SMP +/** + * meta_intc_set_affinity() - set the affinity for an interrupt + * @data: data for the external irq to set the affinity of + * @cpumask: cpu mask representing cpus which can handle the interrupt + * @force: whether to force (ignored) + * + * Revector the specified external irq onto a specific cpu's TR2 trigger, so + * that that cpu tends to be the one who handles it. + */ +static int meta_intc_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int cpu, thread; + + /* + * Wire up this interrupt from HWVECxEXT to the Meta core. + * + * Note that we can't wire up HWVECxEXT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + return 0; +} +#else +#define meta_intc_set_affinity NULL +#endif + +#ifdef CONFIG_PM_SLEEP +#define META_INTC_CHIP_FLAGS (IRQCHIP_MASK_ON_SUSPEND \ + | IRQCHIP_SKIP_SET_WAKE) +#else +#define META_INTC_CHIP_FLAGS 0 +#endif + +/* public edge/level irq chips which SoCs can override */ + +struct irq_chip meta_intc_edge_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_ack = meta_intc_ack_irq, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +struct irq_chip meta_intc_level_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +/** + * meta_intc_map() - map an external irq + * @d: irq domain of external trigger block + * @irq: virtual irq number + * @hw: hardware irq number within external trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured, using the HWLEVELEXT registers to determine + * edge/level flow type. These registers will have been set when the irq type is + * set (or set to a default at init time). + */ +static int meta_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + + /* Go by the current sense in the HWLEVELEXT register */ + if (metag_in32(level_addr) & bit) + irq_set_chip_and_handler(irq, &meta_intc_level_chip, + handle_level_irq); + else + irq_set_chip_and_handler(irq, &meta_intc_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops meta_intc_domain_ops = { + .map = meta_intc_map, + .xlate = irq_domain_xlate_twocell, +}; + +#ifdef CONFIG_METAG_SUSPEND_MEM + +/** + * struct meta_intc_context - suspend context + * @levels: State of HWLEVELEXT registers + * @masks: State of HWMASKEXT registers + * @vectors: State of HWVECEXT registers + * @txvecint: State of TxVECINT registers + * + * This structure stores the IRQ state across suspend. + */ +struct meta_intc_context { + u32 levels[4]; + u32 masks[4]; + u8 vectors[4*32]; + + u8 txvecint[4][4]; +}; + +/* suspend context */ +static struct meta_intc_context *meta_intc_context; + +/** + * meta_intc_suspend() - store irq state + * + * To avoid interfering with other threads we only save the IRQ state of IRQs in + * use by Linux. + */ +static int meta_intc_suspend(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit; + + context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (!context) + return -ENOMEM; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* save mapped irqs which are enabled or have actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* save trigger vector */ + context->vectors[hw] = metag_in32(vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + /* save level state if any IRQ levels altered */ + if (priv->levels_altered[bank]) + context->levels[bank] = metag_in32(level_addr); + /* save mask state if any IRQs in use */ + if (mask) + context->masks[bank] = metag_in32(mask_addr); + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* save trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) + for (j = 0; j < 4; ++j) + context->txvecint[i][j] = metag_in32(T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + __global_unlock2(flags); + + meta_intc_context = context; + return 0; +} + +/** + * meta_intc_resume() - restore saved irq state + * + * Restore the saved IRQ state and drop it. + */ +static void meta_intc_resume(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context = meta_intc_context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit, tmp; + + meta_intc_context = NULL; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* restore mapped irqs, enabled or with actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* restore trigger vector */ + metag_out32(context->vectors[hw], vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + if (mask) { + /* restore mask state */ + __global_lock2(flags); + tmp = metag_in32(mask_addr); + tmp = (tmp & ~mask) | (context->masks[bank] & mask); + metag_out32(tmp, mask_addr); + __global_unlock2(flags); + } + + mask = priv->levels_altered[bank]; + if (mask) { + /* restore level state */ + __global_lock2(flags); + tmp = metag_in32(level_addr); + tmp = (tmp & ~mask) | (context->levels[bank] & mask); + metag_out32(tmp, level_addr); + __global_unlock2(flags); + } + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* restore trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + metag_out32(context->txvecint[i][j], + T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + } + } + __global_unlock2(flags); + + kfree(context); +} + +static struct syscore_ops meta_intc_syscore_ops = { + .suspend = meta_intc_suspend, + .resume = meta_intc_resume, +}; + +static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv) +{ + register_syscore_ops(&meta_intc_syscore_ops); +} +#else +#define meta_intc_init_syscore_ops(priv) do {} while (0) +#endif + +/** + * meta_intc_init_cpu() - register with a Meta cpu + * @priv: private interrupt controller data + * @cpu: the CPU to register on + * + * Configure @cpu's TR2 irq so that we can demux external irqs. + */ +static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR2(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_chained_handler(irq, meta_intc_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * meta_intc_no_mask() - indicate lack of HWMASKEXT registers + * + * Called from SoC code (or init code below) to dynamically indicate the lack of + * HWMASKEXT registers (for example depending on some SoC revision register). + * This alters the irq mask and unmask callbacks to use the fallback + * unvectoring/retriggering technique instead of using HWMASKEXT registers. + */ +void __init meta_intc_no_mask(void) +{ + meta_intc_edge_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_edge_chip.irq_unmask = meta_intc_unmask_edge_irq_nomask; + meta_intc_level_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_level_chip.irq_unmask = meta_intc_unmask_level_irq_nomask; +} + +/** + * init_external_IRQ() - initialise the external irq controller + * + * Set up the external irq controller using device tree properties. This is + * called from init_IRQ(). + */ +int __init init_external_IRQ(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + struct device_node *node; + int ret, cpu; + u32 val; + bool no_masks = false; + + node = of_find_compatible_node(NULL, NULL, "img,meta-intc"); + if (!node) + return -ENOENT; + + /* Get number of banks */ + ret = of_property_read_u32(node, "num-banks", &val); + if (ret) { + pr_err("meta-intc: No num-banks property found\n"); + return ret; + } + if (val < 1 || val > 4) { + pr_err("meta-intc: num-banks (%u) out of range\n", val); + return -EINVAL; + } + priv->nr_banks = val; + + /* Are any mask registers present? */ + if (of_get_property(node, "no-mask", NULL)) + no_masks = true; + + /* No HWMASKEXT registers present? */ + if (no_masks) + meta_intc_no_mask(); + + /* Set up an IRQ domain */ + /* + * This is a legacy IRQ domain for now until all the platform setup code + * has been converted to devicetree. + */ + priv->domain = irq_domain_add_linear(node, priv->nr_banks*32, + &meta_intc_domain_ops, priv); + if (unlikely(!priv->domain)) { + pr_err("meta-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR2 for all cpus. */ + for_each_possible_cpu(cpu) + meta_intc_init_cpu(priv, cpu); + + /* Set up system suspend/resume callbacks */ + meta_intc_init_syscore_ops(priv); + + pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n", + priv->nr_banks*32); + + return 0; +} diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c new file mode 100644 index 000000000000..8e94d7a3b20d --- /dev/null +++ b/drivers/irqchip/irq-metag.c @@ -0,0 +1,343 @@ +/* + * Meta internal (HWSTATMETA) interrupt code. + * + * Copyright (C) 2011-2012 Imagination Technologies Ltd. + * + * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c + * The code base could be generalised/merged as a lot of the functionality is + * similar. Until this is done, we try to keep the code simple here. + */ + +#include +#include +#include + +#include +#include + +#define PERF0VECINT 0x04820580 +#define PERF1VECINT 0x04820588 +#define PERF0TRIG_OFFSET 16 +#define PERF1TRIG_OFFSET 17 + +/** + * struct metag_internal_irq_priv - private meta internal interrupt data + * @domain: IRQ domain for all internal Meta IRQs (HWSTATMETA) + * @unmasked: Record of unmasked IRQs + */ +struct metag_internal_irq_priv { + struct irq_domain *domain; + + unsigned long unmasked; +}; + +/* Private data for the one and only internal interrupt controller */ +static struct metag_internal_irq_priv metag_internal_irq_priv; + +static unsigned int metag_internal_irq_startup(struct irq_data *data); +static void metag_internal_irq_shutdown(struct irq_data *data); +static void metag_internal_irq_ack(struct irq_data *data); +static void metag_internal_irq_mask(struct irq_data *data); +static void metag_internal_irq_unmask(struct irq_data *data); +#ifdef CONFIG_SMP +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force); +#endif + +static struct irq_chip internal_irq_edge_chip = { + .name = "HWSTATMETA-IRQ", + .irq_startup = metag_internal_irq_startup, + .irq_shutdown = metag_internal_irq_shutdown, + .irq_ack = metag_internal_irq_ack, + .irq_mask = metag_internal_irq_mask, + .irq_unmask = metag_internal_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = metag_internal_irq_set_affinity, +#endif +}; + +/* + * metag_hwvec_addr - get the address of *VECINT regs of irq + * + * This function is a table of supported triggers on HWSTATMETA + * Could do with a structure, but better keep it simple. Changes + * in this code should be rare. + */ +static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw) +{ + void __iomem *addr; + + switch (hw) { + case PERF0TRIG_OFFSET: + addr = (void __iomem *)PERF0VECINT; + break; + case PERF1TRIG_OFFSET: + addr = (void __iomem *)PERF1VECINT; + break; + default: + addr = NULL; + break; + } + return addr; +} + +/* + * metag_internal_startup - setup an internal irq + * @irq: the irq to startup + * + * Multiplex interrupts for @irq onto TR1. Clear any pending + * interrupts. + */ +static unsigned int metag_internal_irq_startup(struct irq_data *data) +{ + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); + + /* Enable the interrupt by unmasking it */ + metag_internal_irq_unmask(data); + + return 0; +} + +/* + * metag_internal_irq_shutdown - turn off the irq + * @irq: the irq number to turn off + * + * Mask @irq and clear any pending interrupts. + * Stop muxing @irq onto TR1. + */ +static void metag_internal_irq_shutdown(struct irq_data *data) +{ + /* Disable the IRQ at the core by masking it. */ + metag_internal_irq_mask(data); + + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); +} + +/* + * metag_internal_irq_ack - acknowledge irq + * @irq: the irq to ack + */ +static void metag_internal_irq_ack(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + + if (metag_in32(HWSTATMETA) & bit) + metag_out32(bit, HWSTATMETA); +} + +/** + * metag_internal_irq_mask() - mask an internal irq by unvectoring + * @data: data for the internal irq to mask + * + * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core + * and retriggered if necessary later. + */ +static void metag_internal_irq_mask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = metag_hwvec_addr(hw); + + clear_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the internal irq to unmask + * + * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the + * core and retriggered if necessary. + */ +static void metag_internal_irq_unmask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + void __iomem *vec_addr = metag_hwvec_addr(hw); + unsigned int thread = hard_processor_id(); + + set_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(HWSTATMETA) & bit) { + metag_out32(bit, HWSTATMETA); + while (!(metag_in32(HWSTATMETA) & bit)) + metag_out32(bit, HWSTATMETA); + } +} + +#ifdef CONFIG_SMP +/* + * metag_internal_irq_set_affinity - set the affinity for an interrupt + */ +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + unsigned int cpu, thread; + irq_hw_number_t hw = data->hwirq; + /* + * Wire up this interrupt from *VECINT to the Meta core. + * + * Note that we can't wire up *VECINT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), + metag_hwvec_addr(hw)); + + return 0; +} +#endif + +/* + * metag_internal_irq_demux - irq de-multiplexer + * @irq: the interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR1 when an interrupt has + * occurred. It is this function's job to demux this irq and + * figure out exactly which trigger needs servicing. + */ +static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc); + irq_hw_number_t hw; + unsigned int irq_no; + u32 status; + +recalculate: + status = metag_in32(HWSTATMETA) & priv->unmasked; + + for (hw = 0; status != 0; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual Linux IRQ + * number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off interrupts that are + * registered to be handled by the kernel. + * Other interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } +} + +/** + * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number. + * @hw: Number of the internal IRQ. Must be in range. + * + * Returns: The virtual IRQ number of the Meta internal IRQ specified by + * @hw. + */ +int internal_irq_map(unsigned int hw) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + if (!priv->domain) + return -ENODEV; + return irq_create_mapping(priv->domain, hw); +} + +/** + * metag_internal_irq_init_cpu - regsister with the Meta cpu + * @cpu: the CPU to register on + * + * Configure @cpu's TR1 irq so that we can demux irqs. + */ +static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv, + int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR1(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_handler_data(irq, priv); + irq_set_chained_handler(irq, metag_internal_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * metag_internal_intc_map() - map an internal irq + * @d: irq domain of internal trigger block + * @irq: virtual irq number + * @hw: hardware irq number within internal trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured. + */ +static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + /* only register interrupt if it is mapped */ + if (!metag_hwvec_addr(hw)) + return -EINVAL; + + irq_set_chip_and_handler(irq, &internal_irq_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops metag_internal_intc_domain_ops = { + .map = metag_internal_intc_map, +}; + +/** + * metag_internal_irq_register - register internal IRQs + * + * Register the irq chip and handler function for all internal IRQs + */ +int __init init_internal_IRQ(void) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + unsigned int cpu; + + /* Set up an IRQ domain */ + priv->domain = irq_domain_add_linear(NULL, 32, + &metag_internal_intc_domain_ops, + priv); + if (unlikely(!priv->domain)) { + pr_err("meta-internal-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR1 for all cpus. */ + for_each_possible_cpu(cpu) + metag_internal_irq_init_cpu(priv, cpu); + + return 0; +}; diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h new file mode 100644 index 000000000000..697af0fe7c5a --- /dev/null +++ b/include/linux/irqchip/metag-ext.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2012 Imagination Technologies + */ + +#ifndef _LINUX_IRQCHIP_METAG_EXT_H_ +#define _LINUX_IRQCHIP_METAG_EXT_H_ + +struct irq_data; +struct platform_device; + +/* called from core irq code at init */ +int init_external_IRQ(void); + +/* + * called from SoC init_irq() callback to dynamically indicate the lack of + * HWMASKEXT registers. + */ +void meta_intc_no_mask(void); + +/* + * These allow SoCs to specialise the interrupt controller from their init_irq + * callbacks. + */ + +extern struct irq_chip meta_intc_edge_chip; +extern struct irq_chip meta_intc_level_chip; + +/* this should be called in the mask callback */ +void meta_intc_mask_irq_simple(struct irq_data *data); +/* this should be called in the unmask callback */ +void meta_intc_unmask_irq_simple(struct irq_data *data); + +#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */ diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h new file mode 100644 index 000000000000..4ebdfb3101ab --- /dev/null +++ b/include/linux/irqchip/metag.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2011 Imagination Technologies + */ + +#ifndef _LINUX_IRQCHIP_METAG_H_ +#define _LINUX_IRQCHIP_METAG_H_ + +#include + +#ifdef CONFIG_METAG_PERFCOUNTER_IRQS +extern int init_internal_IRQ(void); +extern int internal_irq_map(unsigned int hw); +#else +static inline int init_internal_IRQ(void) +{ + return 0; +} +static inline int internal_irq_map(unsigned int hw) +{ + return -EINVAL; +} +#endif + +#endif /* _LINUX_IRQCHIP_METAG_H_ */ -- cgit v1.2.3 From 9ca52ed979b6b45ae480a5fc56d593efb3bf16e8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 16 Oct 2012 10:16:14 +0100 Subject: mm: define VM_GROWSUP for CONFIG_METAG Commit cc2383ec06be093789469852e1fe96e1148e9a2c ("mm: introduce arch-specific vma flag VM_ARCH_1") merged in v3.7-rc1. The above commit combined several arch-specific vma flags into one, and in the process it changed the VM_GROWSUP definition to depend on specific architectures rather than CONFIG_STACK_GROWSUP. Therefore add an ifdef for CONFIG_METAG to also set VM_GROWSUP. Signed-off-by: James Hogan Cc: Konstantin Khlebnikov Cc: Andrew Morton Cc: Mel Gorman Cc: Michel Lespinasse Cc: Al Viro Cc: linux-mm@kvack.org --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 66e2f7c61e5c..44ac3dc363e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -114,6 +114,8 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 +#elif defined(CONFIG_METAG) +# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) # define VM_GROWSUP VM_ARCH_1 #elif !defined(CONFIG_MMU) -- cgit v1.2.3