From a370025d1ed83b2b8202c4fefe4ea465bf22f10a Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 10 Nov 2004 21:29:32 -0800 Subject: [PATCH] remove contention on profile_lock profile_hook unconditionally takes a read lock on profile_lock if kernel profiling is enabled. The lock protects the profile_hook notifier chain from being written while it's being called. The routine profile_hook is called in a very hot path though: every timer tick on every CPU. As you can imagine, on a large system, this makes the cacheline containing profile_lock pretty hot. Since oprofile was the only user of the profile_hook, I removed the notifier chain altogether in favor of a simple function pointer with the help of John Levon. This removes all of the contention in the hot path since the variable is very seldom written and simplifies things a little to boot. Acked-by: John Levon Signed-off-by: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/profile.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/profile.h b/include/linux/profile.h index a22f4a15c981..026969a5595c 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -53,13 +53,13 @@ int task_handoff_unregister(struct notifier_block * n); int profile_event_register(enum profile_type, struct notifier_block * n); int profile_event_unregister(enum profile_type, struct notifier_block * n); -int register_profile_notifier(struct notifier_block * nb); -int unregister_profile_notifier(struct notifier_block * nb); +int register_timer_hook(int (*hook)(struct pt_regs *)); +void unregister_timer_hook(int (*hook)(struct pt_regs *)); -struct pt_regs; +/* Timer based profiling hook */ +extern int (*timer_hook)(struct pt_regs *); -/* profiling hook activated on each timer interrupt */ -void profile_hook(struct pt_regs * regs); +struct pt_regs; #else @@ -87,18 +87,16 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_ #define profile_handoff_task(a) (0) #define profile_munmap(a) do { } while (0) -static inline int register_profile_notifier(struct notifier_block * nb) +static inline int register_timer_hook(int (*hook)(struct pt_regs *)) { return -ENOSYS; } -static inline int unregister_profile_notifier(struct notifier_block * nb) +static inline void unregister_timer_hook(int (*hook)(struct pt_regs *)) { - return -ENOSYS; + return; } -#define profile_hook(regs) do { } while (0) - #endif /* CONFIG_PROFILING */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From c62ba94c38516675f02007826a4aab325125aa8d Mon Sep 17 00:00:00 2001 From: Suparna Bhattacharya Date: Wed, 10 Nov 2004 21:29:50 -0800 Subject: [PATCH] Fix O_SYNC speedup for generic_file_write_nolock The O_SYNC speedup patches missed the generic_file_xxx_nolock cases, which means that pages weren't actually getting sync'ed in those cases. This patch fixes that. Signed-off-by: Suparna Bhattacharya Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 2 ++ mm/filemap.c | 73 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 1c9994fe2acc..4ab519ad9f55 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -106,6 +106,8 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, size_t count); +int sync_page_range_nolock(struct inode *inode, struct address_space + *mapping, loff_t pos, size_t count); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/mm/filemap.c b/mm/filemap.c index 3f169349c10f..bf87992991fb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -283,6 +283,29 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, } EXPORT_SYMBOL(sync_page_range); +/* + * Note: Holding i_sem across sync_page_range_nolock is not a good idea + * as it forces O_SYNC writers to different parts of the same file + * to be serialised right until io completion. + */ +int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, + loff_t pos, size_t count) +{ + pgoff_t start = pos >> PAGE_CACHE_SHIFT; + pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; + int ret; + + if (mapping->backing_dev_info->memory_backed || !count) + return 0; + ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); + if (ret == 0) + ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); + if (ret == 0) + ret = wait_on_page_writeback_range(mapping, start, end); + return ret; +} +EXPORT_SYMBOL(sync_page_range_nolock); + /** * filemap_fdatawait - walk the list of under-writeback pages of the given * address space and wait for all of them. @@ -1826,7 +1849,6 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i } return 0; } - EXPORT_SYMBOL(generic_write_checks); ssize_t @@ -1864,7 +1886,6 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, written = -EIOCBQUEUED; return written; } - EXPORT_SYMBOL(generic_file_direct_write); ssize_t @@ -1986,11 +2007,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, pagevec_lru_add(&lru_pvec); return written ? written : status; } - EXPORT_SYMBOL(generic_file_buffered_write); ssize_t -generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, +__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) { struct file *file = iocb->ki_filp; @@ -2063,9 +2083,44 @@ out: current->backing_dev_info = NULL; return written ? written : err; } - EXPORT_SYMBOL(generic_file_aio_write_nolock); +ssize_t +generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + loff_t pos = *ppos; + + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos); + + if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + + err = sync_page_range_nolock(inode, mapping, pos, ret); + if (err < 0) + ret = err; + } + return ret; +} + +ssize_t +__generic_file_write_nolock(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, file); + ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + return ret; +} + ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) @@ -2079,7 +2134,6 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov, ret = wait_on_sync_kiocb(&kiocb); return ret; } - EXPORT_SYMBOL(generic_file_write_nolock); ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, @@ -2120,7 +2174,7 @@ ssize_t generic_file_write(struct file *file, const char __user *buf, .iov_len = count }; down(&inode->i_sem); - ret = generic_file_write_nolock(file, &local_iov, 1, ppos); + ret = __generic_file_write_nolock(file, &local_iov, 1, ppos); up(&inode->i_sem); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { @@ -2146,7 +2200,6 @@ ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, ret = wait_on_sync_kiocb(&kiocb); return ret; } - EXPORT_SYMBOL(generic_file_readv); ssize_t generic_file_writev(struct file *file, const struct iovec *iov, @@ -2157,7 +2210,7 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov, ssize_t ret; down(&inode->i_sem); - ret = generic_file_write_nolock(file, iov, nr_segs, ppos); + ret = __generic_file_write_nolock(file, iov, nr_segs, ppos); up(&inode->i_sem); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { @@ -2169,7 +2222,6 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov, } return ret; } - EXPORT_SYMBOL(generic_file_writev); /* @@ -2192,5 +2244,4 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, } return retval; } - EXPORT_SYMBOL_GPL(generic_file_direct_IO); -- cgit v1.2.3 From fc0c58f5fc1f240903dea998d8239203f95e9b2d Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 10 Nov 2004 21:41:26 -0800 Subject: [PATCH] "Bad" naming of structures and functions in ext3 reservation code The patch below renames struct reserve_window_node* and rsv_window_add() function to struct ext3_reserve_window_node* and ext3_rsv_window_add(). This eases the task of having several ext3-derived filesystem drivers (with different capabilities) in kernel. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/balloc.c | 56 +++++++++++++++++++++++----------------------- fs/ext3/super.c | 2 +- include/linux/ext3_fs.h | 2 +- include/linux/ext3_fs_i.h | 8 +++---- include/linux/ext3_fs_sb.h | 2 +- 5 files changed, 35 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index d51c5ce3c72c..071237559c99 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -116,7 +116,7 @@ static void __rsv_window_dump(struct rb_root *root, int verbose, const char *fn) { struct rb_node *n; - struct reserve_window_node *rsv, *prev; + struct ext3_reserve_window_node *rsv, *prev; int bad; restart: @@ -126,7 +126,7 @@ restart: printk("Block Allocation Reservation Windows Map (%s):\n", fn); while (n) { - rsv = list_entry(n, struct reserve_window_node, rsv_node); + rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); if (verbose) printk("reservation window 0x%p " "start: %d, end: %d\n", @@ -162,7 +162,7 @@ restart: #endif static int -goal_in_my_reservation(struct reserve_window *rsv, int goal, +goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, unsigned int group, struct super_block * sb) { unsigned long group_first_block, group_last_block; @@ -185,17 +185,17 @@ goal_in_my_reservation(struct reserve_window *rsv, int goal, * if the goal is not in any window. * Returns NULL if there are no windows or if all windows start after the goal. */ -static struct reserve_window_node *search_reserve_window(struct rb_root *root, - unsigned long goal) +static struct ext3_reserve_window_node * +search_reserve_window(struct rb_root *root, unsigned long goal) { struct rb_node *n = root->rb_node; - struct reserve_window_node *rsv; + struct ext3_reserve_window_node *rsv; if (!n) return NULL; do { - rsv = rb_entry(n, struct reserve_window_node, rsv_node); + rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); if (goal < rsv->rsv_start) n = n->rb_left; @@ -212,13 +212,13 @@ static struct reserve_window_node *search_reserve_window(struct rb_root *root, */ if (rsv->rsv_start > goal) { n = rb_prev(&rsv->rsv_node); - rsv = rb_entry(n, struct reserve_window_node, rsv_node); + rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); } return rsv; } -void rsv_window_add(struct super_block *sb, - struct reserve_window_node *rsv) +void ext3_rsv_window_add(struct super_block *sb, + struct ext3_reserve_window_node *rsv) { struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; struct rb_node *node = &rsv->rsv_node; @@ -226,12 +226,12 @@ void rsv_window_add(struct super_block *sb, struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct reserve_window_node *this; + struct ext3_reserve_window_node *this; while (*p) { parent = *p; - this = rb_entry(parent, struct reserve_window_node, rsv_node); + this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); if (start < this->rsv_start) p = &(*p)->rb_left; @@ -246,7 +246,7 @@ void rsv_window_add(struct super_block *sb, } static void rsv_window_remove(struct super_block *sb, - struct reserve_window_node *rsv) + struct ext3_reserve_window_node *rsv) { rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; @@ -254,7 +254,7 @@ static void rsv_window_remove(struct super_block *sb, rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); } -static inline int rsv_is_empty(struct reserve_window *rsv) +static inline int rsv_is_empty(struct ext3_reserve_window *rsv) { /* a valid reservation end block could not be 0 */ return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED); @@ -263,7 +263,7 @@ static inline int rsv_is_empty(struct reserve_window *rsv) void ext3_discard_reservation(struct inode *inode) { struct ext3_inode_info *ei = EXT3_I(inode); - struct reserve_window_node *rsv = &ei->i_rsv_window; + struct ext3_reserve_window_node *rsv = &ei->i_rsv_window; spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; if (!rsv_is_empty(&rsv->rsv_window)) { @@ -611,7 +611,7 @@ claim_block(spinlock_t *lock, int block, struct buffer_head *bh) */ static int ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, - struct buffer_head *bitmap_bh, int goal, struct reserve_window *my_rsv) + struct buffer_head *bitmap_bh, int goal, struct ext3_reserve_window *my_rsv) { int group_first_block, start, end; @@ -711,13 +711,13 @@ fail_access: * on succeed, it returns the reservation window to be appended to. * failed, return NULL. */ -static struct reserve_window_node *find_next_reservable_window( - struct reserve_window_node *search_head, +static struct ext3_reserve_window_node *find_next_reservable_window( + struct ext3_reserve_window_node *search_head, unsigned long size, int *start_block, int last_block) { struct rb_node *next; - struct reserve_window_node *rsv, *prev; + struct ext3_reserve_window_node *rsv, *prev; int cur; /* TODO: make the start of the reservation window byte-aligned */ @@ -745,7 +745,7 @@ static struct reserve_window_node *find_next_reservable_window( prev = rsv; next = rb_next(&rsv->rsv_node); - rsv = list_entry(next, struct reserve_window_node, rsv_node); + rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); /* * Reached the last reservation, we can just append to the @@ -812,15 +812,15 @@ static struct reserve_window_node *find_next_reservable_window( * @group: the group we are trying to allocate in * @bitmap_bh: the block group block bitmap */ -static int alloc_new_reservation(struct reserve_window_node *my_rsv, +static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, int goal, struct super_block *sb, unsigned int group, struct buffer_head *bitmap_bh) { - struct reserve_window_node *search_head; + struct ext3_reserve_window_node *search_head; int group_first_block, group_end_block, start_block; int first_free_block; int reservable_space_start; - struct reserve_window_node *prev_rsv; + struct ext3_reserve_window_node *prev_rsv; struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; unsigned long size; @@ -941,7 +941,7 @@ found_rsv_window: my_rsv->rsv_end = my_rsv->rsv_start + size - 1; atomic_set(&my_rsv->rsv_alloc_hit, 0); if (my_rsv != prev_rsv) { - rsv_window_add(sb, my_rsv); + ext3_rsv_window_add(sb, my_rsv); } return 0; /* succeed */ failed: @@ -979,7 +979,7 @@ failed: static int ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, unsigned int group, struct buffer_head *bitmap_bh, - int goal, struct reserve_window_node * my_rsv, + int goal, struct ext3_reserve_window_node * my_rsv, int *errp) { spinlock_t *rsv_lock; @@ -1038,7 +1038,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, * then we could go to allocate from the reservation window directly. */ while (1) { - struct reserve_window rsv_copy; + struct ext3_reserve_window rsv_copy; unsigned int seq; do { @@ -1146,8 +1146,8 @@ int ext3_new_block(handle_t *handle, struct inode *inode, struct ext3_group_desc *gdp; struct ext3_super_block *es; struct ext3_sb_info *sbi; - struct reserve_window_node *my_rsv = NULL; - struct reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; + struct ext3_reserve_window_node *my_rsv = NULL; + struct ext3_reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; unsigned short windowsz = 0; #ifdef EXT3FS_DEBUG static int goal_hits, goal_attempts; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 35500533622d..34c4744813e1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1493,7 +1493,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; atomic_set(&sbi->s_rsv_window_head.rsv_alloc_hit, 0); atomic_set(&sbi->s_rsv_window_head.rsv_goal_size, 0); - rsv_window_add(sb, &sbi->s_rsv_window_head); + ext3_rsv_window_add(sb, &sbi->s_rsv_window_head); /* * set up enough so that it can read an inode diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d44abc7bd33a..5e0e52b0fc7e 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -722,7 +722,7 @@ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, unsigned int block_group, struct buffer_head ** bh); extern int ext3_should_retry_alloc(struct super_block *sb, int *retries); -extern void rsv_window_add(struct super_block *sb, struct reserve_window_node *rsv); +extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv); /* dir.c */ extern int ext3_check_dir_entry(const char *, struct inode *, diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index c549633532de..328cd40c5ea4 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -20,17 +20,17 @@ #include #include -struct reserve_window { +struct ext3_reserve_window { __u32 _rsv_start; /* First byte reserved */ __u32 _rsv_end; /* Last byte reserved or 0 */ }; -struct reserve_window_node { +struct ext3_reserve_window_node { struct rb_node rsv_node; atomic_t rsv_goal_size; atomic_t rsv_alloc_hit; seqlock_t rsv_seqlock; - struct reserve_window rsv_window; + struct ext3_reserve_window rsv_window; }; #define rsv_start rsv_window._rsv_start @@ -76,7 +76,7 @@ struct ext3_inode_info { */ __u32 i_next_alloc_goal; /* block reservation window */ - struct reserve_window_node i_rsv_window; + struct ext3_reserve_window_node i_rsv_window; __u32 i_dir_start_lookup; #ifdef CONFIG_EXT3_FS_XATTR diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index ac5fb22c5b7c..f61309c81cc4 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -62,7 +62,7 @@ struct ext3_sb_info { /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; struct rb_root s_rsv_window_root; - struct reserve_window_node s_rsv_window_head; + struct ext3_reserve_window_node s_rsv_window_head; /* Journaling */ struct inode * s_journal_inode; -- cgit v1.2.3 From 8d5b3b971205ac5fc839f7c55a588463e36c53ef Mon Sep 17 00:00:00 2001 From: Antonino Daplas Date: Wed, 10 Nov 2004 21:47:11 -0800 Subject: [PATCH] fbdev: Introduce FB_BLANK_* constants The VESA_* constants in fb.h used for power management of the display is confusing to use. The constants seems to be meant for userspace, because within the kernel (vt and fbdev), the constants have to be incremented by 1. Implementation of fb_blank() varies from driver to driver: - if-else on blank/!blank - switch case on hardcoded numbers - switch case on the constants + 1 - switch -1, case on constants as is - switch case on the constants as is (broken) To make usage clearer, new constants are defined in fb.h: FB_BLANK_UNBLANK = VESA_UNBLANKING = 0; FB_BLANK_NORMAL = VESA_UNBLANKING + 1 = 1; FB_BLANK_VSYNC_SUSPEND = VESA_VSYNC_SUSPEND + 1 = 2; FB_BLANK_HSYNC_SUSPEND = VESA_HSYNC_SUSPEND + 1 = 3; FB_BLANK_POWERDOWN = VESA_POWERDOWN + 1 = 4; Other changes: - generic blanking code in fbcon.c (for drivers without an fb_blank hook) which either sets the palette to all black, or clear the screen with black. - make fb_display an unexportable symbol Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/console/fbcon.c | 87 +++++++++++++++++++++++++++---------------- drivers/video/fbmem.c | 5 +-- include/linux/fb.h | 18 +++++++++ 3 files changed, 75 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 8b528597f74f..0235d9fb1d79 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -212,7 +212,7 @@ static inline int get_color(struct vc_data *vc, struct fb_info *info, int depth = fb_get_color_depth(info); int color = 0; - if (!info->fbops->fb_blank && console_blanked) { + if (console_blanked) { unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; c = vc->vc_video_erase_char & charmask; @@ -229,7 +229,7 @@ static inline int get_color(struct vc_data *vc, struct fb_info *info, int fg = (info->fix.visual != FB_VISUAL_MONO01) ? 1 : 0; int bg = (info->fix.visual != FB_VISUAL_MONO01) ? 0 : 1; - if (!info->fbops->fb_blank && console_blanked) + if (console_blanked) fg = bg; color = (is_fg) ? fg : bg; @@ -1993,17 +1993,52 @@ static int fbcon_switch(struct vc_data *vc) return 1; } +static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info, + int blank) +{ + if (blank) { + if (info->fix.visual == FB_VISUAL_DIRECTCOLOR || + info->fix.visual == FB_VISUAL_PSEUDOCOLOR) { + struct fb_cmap cmap; + u16 *black; + + black = kmalloc(sizeof(u16) * info->cmap.len, + GFP_KERNEL); + if (black) { + memset(black, 0, info->cmap.len * sizeof(u16)); + cmap.red = cmap.green = cmap.blue = black; + cmap.transp = info->cmap.transp ? black : NULL; + cmap.start = info->cmap.start; + cmap.len = info->cmap.len; + fb_set_cmap(&cmap, info); + } + + kfree(black); + } else { + unsigned short charmask = vc->vc_hi_font_mask ? + 0x1ff : 0xff; + unsigned short oldc; + + oldc = vc->vc_video_erase_char; + vc->vc_video_erase_char &= charmask; + fbcon_clear(vc, 0, 0, vc->vc_rows, vc->vc_cols); + vc->vc_video_erase_char = oldc; + } + } else { + if (info->fix.visual == FB_VISUAL_DIRECTCOLOR || + info->fix.visual == FB_VISUAL_PSEUDOCOLOR) + fb_set_cmap(&info->cmap, info); + } +} + static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch) { - unsigned short charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; - int retval = 0; + int active = !fbcon_is_inactive(vc, info); if (mode_switch) { struct fb_var_screeninfo var = info->var; - /* * HACK ALERT: Some hardware will require reinitializion at this stage, * others will require it to be done as late as possible. @@ -2020,36 +2055,25 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch) var.activate = FB_ACTIVATE_NOW | FB_ACTIVATE_FORCE; fb_set_var(info, &var); } - - return 0; } - fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW); - ops->cursor_flash = (!blank); + if (active) { + int ret = -1; - if (!info->fbops->fb_blank) { - if (blank) { - unsigned short oldc; - u_int height; - u_int y_break; + fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW); + ops->cursor_flash = (!blank); - oldc = vc->vc_video_erase_char; - vc->vc_video_erase_char &= charmask; - height = vc->vc_rows; - y_break = p->vrows - p->yscroll; - if (height > y_break) { - fbcon_clear(vc, 0, 0, y_break, vc->vc_cols); - fbcon_clear(vc, y_break, 0, height - y_break, - vc->vc_cols); - } else - fbcon_clear(vc, 0, 0, height, vc->vc_cols); - vc->vc_video_erase_char = oldc; - } else if (!fbcon_is_inactive(vc, info)) - update_screen(vc->vc_num); - } else if (vt_cons[vc->vc_num]->vc_mode == KD_TEXT) - retval = info->fbops->fb_blank(blank, info); + if (info->fbops->fb_blank) + ret = info->fbops->fb_blank(blank, info); - return retval; + if (ret) + fbcon_generic_blank(vc, info, blank); + + if (!blank) + update_screen(vc->vc_num); + } + + return 0; } static void fbcon_free_font(struct display *p) @@ -2786,7 +2810,6 @@ module_exit(fb_console_exit); * Visible symbols for modules */ -EXPORT_SYMBOL(fb_display); EXPORT_SYMBOL(fb_con); MODULE_LICENSE("GPL"); diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index cb6fe2187b54..a6fbb981108a 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -745,9 +745,8 @@ fb_blank(struct fb_info *info, int blank) { int err = -EINVAL; - /* Workaround for broken X servers */ - if (blank > VESA_POWERDOWN) - blank = VESA_POWERDOWN; + if (blank > FB_BLANK_POWERDOWN) + blank = FB_BLANK_POWERDOWN; if (info->fbops->fb_blank) err = info->fbops->fb_blank(blank, info); diff --git a/include/linux/fb.h b/include/linux/fb.h index d8f4789dce45..c1bb3123bab3 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -258,6 +258,24 @@ struct fb_con2fbmap { #define VESA_HSYNC_SUSPEND 2 #define VESA_POWERDOWN 3 + +enum { + /* screen: unblanked, hsync: on, vsync: on */ + FB_BLANK_UNBLANK = VESA_NO_BLANKING, + + /* screen: blanked, hsync: on, vsync: on */ + FB_BLANK_NORMAL = VESA_NO_BLANKING + 1, + + /* screen: blanked, hsync: on, vsync: off */ + FB_BLANK_VSYNC_SUSPEND = VESA_VSYNC_SUSPEND + 1, + + /* screen: blanked, hsync: off, vsync: on */ + FB_BLANK_HSYNC_SUSPEND = VESA_HSYNC_SUSPEND + 1, + + /* screen: blanked, hsync: off, vsync: off */ + FB_BLANK_POWERDOWN = VESA_POWERDOWN + 1 +}; + #define FB_VBLANK_VBLANKING 0x001 /* currently in a vertical blank */ #define FB_VBLANK_HBLANKING 0x002 /* currently in a horizontal blank */ #define FB_VBLANK_HAVE_VBLANK 0x004 /* vertical blanks can be detected */ -- cgit v1.2.3 From 54532fdd7db14f3370a1f9c0ea48204558da104b Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 10 Nov 2004 21:48:15 -0800 Subject: [PATCH] md: fix problem with md/linear for devices larger than 2 terabytes Some size fields were "int" instead of "sector_t". Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/linear.c | 8 +++++--- include/linux/raid/linear.h | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 06f1e7bec105..6fb445a5c4f4 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -116,7 +116,8 @@ static int linear_run (mddev_t *mddev) linear_conf_t *conf; struct linear_hash *table; mdk_rdev_t *rdev; - int size, i, nb_zone, cnt; + int i, nb_zone, cnt; + sector_t size; unsigned int curr_offset; struct list_head *tmp; @@ -265,10 +266,11 @@ static int linear_make_request (request_queue_t *q, struct bio *bio) char b[BDEVNAME_SIZE]; printk("linear_make_request: Block %llu out of bounds on " - "dev %s size %ld offset %ld\n", + "dev %s size %llu offset %llu\n", (unsigned long long)block, bdevname(tmp_dev->rdev->bdev, b), - tmp_dev->size, tmp_dev->offset); + (unsigned long long)tmp_dev->size, + (unsigned long long)tmp_dev->offset); bio_io_error(bio, bio->bi_size); return 0; } diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h index 70afc1dc8b43..e951b2bb9cdf 100644 --- a/include/linux/raid/linear.h +++ b/include/linux/raid/linear.h @@ -5,8 +5,8 @@ struct dev_info { mdk_rdev_t *rdev; - unsigned long size; - unsigned long offset; + sector_t size; + sector_t offset; }; typedef struct dev_info dev_info_t; -- cgit v1.2.3 From 854d7a6fa60eaa67ef26fb0559b01a2cfb2fbcae Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 10 Nov 2004 21:48:51 -0800 Subject: [PATCH] md: delete unplug timer before shutting down md array As the unplug timer can potentially fire at any time, and and it access data that is released by the md ->stop function, we need to del_timer_sync before releasing that data. (After much discussion, we created blk_sync_queue() for this) Signed-off-by: Neil Brown Contributions from Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/ll_rw_blk.c | 26 ++++++++++++++++++++++---- drivers/md/linear.c | 1 + drivers/md/multipath.c | 1 + drivers/md/raid0.c | 1 + drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + drivers/md/raid5.c | 1 + drivers/md/raid6main.c | 1 + include/linux/blkdev.h | 1 + 9 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 12958fa9ea82..7aac17054084 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -1357,9 +1357,29 @@ void blk_stop_queue(request_queue_t *q) blk_remove_plug(q); set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); } - EXPORT_SYMBOL(blk_stop_queue); +/** + * blk_sync_queue - cancel any pending callbacks on a queue + * @q: the queue + * + * Description: + * The block layer may perform asynchronous callback activity + * on a queue, such as calling the unplug function after a timeout. + * A block device may call blk_sync_queue to ensure that any + * such activity is cancelled, thus allowing it to release resources + * the the callbacks might use. The caller must already have made sure + * that its ->make_request_fn will not re-add plugging prior to calling + * this function. + * + */ +void blk_sync_queue(struct request_queue *q) +{ + del_timer_sync(&q->unplug_timer); + kblockd_flush(); +} +EXPORT_SYMBOL(blk_sync_queue); + /** * blk_run_queue - run a single device queue * @q: The queue to run @@ -1373,7 +1393,6 @@ void blk_run_queue(struct request_queue *q) q->request_fn(q); spin_unlock_irqrestore(q->queue_lock, flags); } - EXPORT_SYMBOL(blk_run_queue); /** @@ -1401,8 +1420,7 @@ void blk_cleanup_queue(request_queue_t * q) if (q->elevator) elevator_exit(q->elevator); - del_timer_sync(&q->unplug_timer); - kblockd_flush(); + blk_sync_queue(q); if (rl->rq_pool) mempool_destroy(rl->rq_pool); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 6fb445a5c4f4..09e32023083c 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -231,6 +231,7 @@ static int linear_stop (mddev_t *mddev) { linear_conf_t *conf = mddev_to_conf(mddev); + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf->hash_table); kfree(conf); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index cbce9d22093e..309398219556 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -547,6 +547,7 @@ static int multipath_stop (mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ mempool_destroy(conf->pool); kfree(conf->multipaths); kfree(conf); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 76c59f160447..e7d934eca06f 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -385,6 +385,7 @@ static int raid0_stop (mddev_t *mddev) { raid0_conf_t *conf = mddev_to_conf(mddev); + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree (conf->hash_table); conf->hash_table = NULL; kfree (conf->strip_zone); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6fc90e363644..4e25ebd53385 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1293,6 +1293,7 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); if (conf->mirrors) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9596358572e3..271566095306 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1744,6 +1744,7 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); if (conf->mirrors) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 90dd9f8a596a..1dceb098a7fb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1707,6 +1707,7 @@ static int stop (mddev_t *mddev) mddev->thread = NULL; shrink_stripes(conf); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf); mddev->private = NULL; return 0; diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index ac588c53c3cb..9a27f8f88a91 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1878,6 +1878,7 @@ static int stop (mddev_t *mddev) mddev->thread = NULL; shrink_stripes(conf); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf); mddev->private = NULL; return 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f04b0e223d82..1b7dc44bf3c1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -522,6 +522,7 @@ extern int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *) extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); extern void blk_start_queue(request_queue_t *q); extern void blk_stop_queue(request_queue_t *q); +extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(request_queue_t *q); extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); -- cgit v1.2.3 From f5711563c37aa28daf75bf2ba99fbea2d5d52f1e Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 10 Nov 2004 21:49:10 -0800 Subject: [PATCH] md: "Faulty" personality The 'faulty' personality provides a layer over any block device in which errors may be synthesised. A variety of errors are possible including transient and persistent read and write errors, and read errors that persist until the next write. There error mode can be changed on a live array. Accessing this personality requires mdadm 2.8.0 or later. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/Kconfig | 9 ++ drivers/md/Makefile | 1 + drivers/md/faulty.c | 343 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/md.c | 13 +- include/linux/raid/md_k.h | 7 +- 5 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 drivers/md/faulty.c (limited to 'include/linux') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 2bf58c928b06..72d0bf2b7a94 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -164,6 +164,15 @@ config MD_MULTIPATH If unsure, say N. +config MD_FAULTY + tristate "Faulty test module for MD" + depends on BLK_DEV_MD + help + The "faulty" module allows for a block device that occasionally returns + read or write errors. It is useful for testing. + + In unsure, say N. + config BLK_DEV_DM tristate "Device mapper support" depends on MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index e1b176505438..b9e64e8764b2 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_MD_RAID10) += raid10.o obj-$(CONFIG_MD_RAID5) += raid5.o xor.o obj-$(CONFIG_MD_RAID6) += raid6.o xor.o obj-$(CONFIG_MD_MULTIPATH) += multipath.o +obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_BLK_DEV_MD) += md.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c new file mode 100644 index 000000000000..0248f8e7eac0 --- /dev/null +++ b/drivers/md/faulty.c @@ -0,0 +1,343 @@ +/* + * faulty.c : Multiple Devices driver for Linux + * + * Copyright (C) 2004 Neil Brown + * + * fautly-device-simulator personality for md + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +/* + * The "faulty" personality causes some requests to fail. + * + * Possible failure modes are: + * reads fail "randomly" but succeed on retry + * writes fail "randomly" but succeed on retry + * reads for some address fail and then persist until a write + * reads for some address fail and then persist irrespective of write + * writes for some address fail and persist + * all writes fail + * + * Different modes can be active at a time, but only + * one can be set at array creation. Others can be added later. + * A mode can be one-shot or recurrent with the recurrance being + * once in every N requests. + * The bottom 5 bits of the "layout" indicate the mode. The + * remainder indicate a period, or 0 for one-shot. + * + * There is an implementation limit on the number of concurrently + * persisting-faulty blocks. When a new fault is requested that would + * exceed the limit, it is ignored. + * All current faults can be clear using a layout of "0". + * + * Requests are always sent to the device. If they are to fail, + * we clone the bio and insert a new b_end_io into the chain. + */ + +#define WriteTransient 0 +#define ReadTransient 1 +#define WritePersistent 2 +#define ReadPersistent 3 +#define WriteAll 4 /* doesn't go to device */ +#define ReadFixable 5 +#define Modes 6 + +#define ClearErrors 31 +#define ClearFaults 30 + +#define AllPersist 100 /* internal use only */ +#define NoPersist 101 + +#define ModeMask 0x1f +#define ModeShift 5 + +#define MaxFault 50 +#include + + +static int faulty_fail(struct bio *bio, unsigned int bytes_done, int error) +{ + struct bio *b = bio->bi_private; + + b->bi_size = bio->bi_size; + b->bi_sector = bio->bi_sector; + + if (bio->bi_size == 0) + bio_put(bio); + + clear_bit(BIO_UPTODATE, &b->bi_flags); + return (b->bi_end_io)(b, bytes_done, -EIO); +} + +typedef struct faulty_conf { + int period[Modes]; + atomic_t counters[Modes]; + sector_t faults[MaxFault]; + int modes[MaxFault]; + int nfaults; + mdk_rdev_t *rdev; +} conf_t; + +static int check_mode(conf_t *conf, int mode) +{ + if (conf->period[mode] == 0 && + atomic_read(&conf->counters[mode]) <= 0) + return 0; /* no failure, no decrement */ + + + if (atomic_dec_and_test(&conf->counters[mode])) { + if (conf->period[mode]) + atomic_set(&conf->counters[mode], conf->period[mode]); + return 1; + } + return 0; +} + +static int check_sector(conf_t *conf, sector_t start, sector_t end, int dir) +{ + /* If we find a ReadFixable sector, we fix it ... */ + int i; + for (i=0; infaults; i++) + if (conf->faults[i] >= start && + conf->faults[i] < end) { + /* found it ... */ + switch (conf->modes[i] * 2 + dir) { + case WritePersistent*2+WRITE: return 1; + case ReadPersistent*2+READ: return 1; + case ReadFixable*2+READ: return 1; + case ReadFixable*2+WRITE: + conf->modes[i] = NoPersist; + return 0; + case AllPersist*2+READ: + case AllPersist*2+WRITE: return 1; + default: + return 0; + } + } + return 0; +} + +static void add_sector(conf_t *conf, sector_t start, int mode) +{ + int i; + int n = conf->nfaults; + for (i=0; infaults; i++) + if (conf->faults[i] == start) { + switch(mode) { + case NoPersist: conf->modes[i] = mode; return; + case WritePersistent: + if (conf->modes[i] == ReadPersistent || + conf->modes[i] == ReadFixable) + conf->modes[i] = AllPersist; + else + conf->modes[i] = WritePersistent; + return; + case ReadPersistent: + if (conf->modes[i] == WritePersistent) + conf->modes[i] = AllPersist; + else + conf->modes[i] = ReadPersistent; + return; + case ReadFixable: + if (conf->modes[i] == WritePersistent || + conf->modes[i] == ReadPersistent) + conf->modes[i] = AllPersist; + else + conf->modes[i] = ReadFixable; + return; + } + } else if (conf->modes[i] == NoPersist) + n = i; + + if (n >= MaxFault) + return; + conf->faults[n] = start; + conf->modes[n] = mode; + if (conf->nfaults == n) + conf->nfaults = n+1; +} + +static int make_request(request_queue_t *q, struct bio *bio) +{ + mddev_t *mddev = q->queuedata; + conf_t *conf = (conf_t*)mddev->private; + int failit = 0; + + if (bio->bi_rw & 1) { + /* write request */ + if (atomic_read(&conf->counters[WriteAll])) { + /* special case - don't decrement, don't generic_make_request, + * just fail immediately + */ + bio_endio(bio, bio->bi_size, -EIO); + return 0; + } + + if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9), + WRITE)) + failit = 1; + if (check_mode(conf, WritePersistent)) { + add_sector(conf, bio->bi_sector, WritePersistent); + failit = 1; + } + if (check_mode(conf, WriteTransient)) + failit = 1; + } else { + /* read request */ + if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9), + READ)) + failit = 1; + if (check_mode(conf, ReadTransient)) + failit = 1; + if (check_mode(conf, ReadPersistent)) { + add_sector(conf, bio->bi_sector, ReadPersistent); + failit = 1; + } + if (check_mode(conf, ReadFixable)) { + add_sector(conf, bio->bi_sector, ReadFixable); + failit = 1; + } + } + if (failit) { + struct bio *b = bio_clone(bio, GFP_NOIO); + b->bi_bdev = conf->rdev->bdev; + b->bi_private = bio; + b->bi_end_io = faulty_fail; + generic_make_request(b); + return 0; + } else { + bio->bi_bdev = conf->rdev->bdev; + return 1; + } +} + +static void status(struct seq_file *seq, mddev_t *mddev) +{ + conf_t *conf = (conf_t*)mddev->private; + int n; + + if ((n=atomic_read(&conf->counters[WriteTransient])) != 0) + seq_printf(seq, " WriteTransient=%d(%d)", + n, conf->period[WriteTransient]); + + if ((n=atomic_read(&conf->counters[ReadTransient])) != 0) + seq_printf(seq, " ReadTransient=%d(%d)", + n, conf->period[ReadTransient]); + + if ((n=atomic_read(&conf->counters[WritePersistent])) != 0) + seq_printf(seq, " WritePersistent=%d(%d)", + n, conf->period[WritePersistent]); + + if ((n=atomic_read(&conf->counters[ReadPersistent])) != 0) + seq_printf(seq, " ReadPersistent=%d(%d)", + n, conf->period[ReadPersistent]); + + + if ((n=atomic_read(&conf->counters[ReadFixable])) != 0) + seq_printf(seq, " ReadFixable=%d(%d)", + n, conf->period[ReadFixable]); + + if ((n=atomic_read(&conf->counters[WriteAll])) != 0) + seq_printf(seq, " WriteAll"); + + seq_printf(seq, " nfaults=%d", conf->nfaults); +} + + +static int reconfig(mddev_t *mddev, int layout, int chunk_size) +{ + int mode = layout & ModeMask; + int count = layout >> ModeShift; + conf_t *conf = mddev->private; + + if (chunk_size != -1) + return -EINVAL; + + /* new layout */ + if (mode == ClearFaults) + conf->nfaults = 0; + else if (mode == ClearErrors) { + int i; + for (i=0 ; i < Modes ; i++) { + conf->period[i] = 0; + atomic_set(&conf->counters[i], 0); + } + } else if (mode < Modes) { + conf->period[mode] = count; + if (!count) count++; + atomic_set(&conf->counters[mode], count); + } else + return -EINVAL; + mddev->layout = -1; /* makes sure further changes come through */ + return 0; +} + +static int run(mddev_t *mddev) +{ + mdk_rdev_t *rdev; + struct list_head *tmp; + int i; + + conf_t *conf = kmalloc(sizeof(*conf), GFP_KERNEL); + + for (i=0; icounters[i], 0); + conf->period[i] = 0; + } + conf->nfaults = 0; + + ITERATE_RDEV(mddev, rdev, tmp) + conf->rdev = rdev; + + mddev->array_size = mddev->size; + mddev->private = conf; + + reconfig(mddev, mddev->layout, -1); + + return 0; +} + +static int stop(mddev_t *mddev) +{ + conf_t *conf = (conf_t *)mddev->private; + + kfree(conf); + mddev->private = NULL; + return 0; +} + +static mdk_personality_t faulty_personality = +{ + .name = "faulty", + .owner = THIS_MODULE, + .make_request = make_request, + .run = run, + .stop = stop, + .status = status, + .reconfig = reconfig, +}; + +static int __init raid_init(void) +{ + return register_md_personality(FAULTY, &faulty_personality); +} + +static void raid_exit(void) +{ + unregister_md_personality(FAULTY); +} + +module_init(raid_init); +module_exit(raid_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("md-personality-10"); /* faulty */ diff --git a/drivers/md/md.c b/drivers/md/md.c index d3950b9edd48..9c258f84376a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2351,16 +2351,27 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) /* mddev->patch_version != info->patch_version || */ mddev->ctime != info->ctime || mddev->level != info->level || - mddev->layout != info->layout || +/* mddev->layout != info->layout || */ !mddev->persistent != info->not_persistent|| mddev->chunk_size != info->chunk_size ) return -EINVAL; /* Check there is only one change */ if (mddev->size != info->size) cnt++; if (mddev->raid_disks != info->raid_disks) cnt++; + if (mddev->layout != info->layout) cnt++; if (cnt == 0) return 0; if (cnt > 1) return -EINVAL; + if (mddev->layout != info->layout) { + /* Change layout + * we don't need to do anything at the md level, the + * personality will take care of it all. + */ + if (mddev->pers->reconfig == NULL) + return -EINVAL; + else + return mddev->pers->reconfig(mddev, info->layout, -1); + } if (mddev->size != info->size) { mdk_rdev_t * rdev; struct list_head *tmp; diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 945346ec2c10..c9a0d4013be7 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -25,10 +25,12 @@ #define MULTIPATH 7UL #define RAID6 8UL #define RAID10 9UL -#define MAX_PERSONALITY 10UL +#define FAULTY 10UL +#define MAX_PERSONALITY 11UL #define LEVEL_MULTIPATH (-4) #define LEVEL_LINEAR (-1) +#define LEVEL_FAULTY (-5) #define MaxSector (~(sector_t)0) #define MD_THREAD_NAME_MAX 14 @@ -36,6 +38,7 @@ static inline int pers_to_level (int pers) { switch (pers) { + case FAULTY: return LEVEL_FAULTY; case MULTIPATH: return LEVEL_MULTIPATH; case HSM: return -3; case TRANSLUCENT: return -2; @@ -53,6 +56,7 @@ static inline int pers_to_level (int pers) static inline int level_to_pers (int level) { switch (level) { + case LEVEL_FAULTY: return FAULTY; case LEVEL_MULTIPATH: return MULTIPATH; case -3: return HSM; case -2: return TRANSLUCENT; @@ -290,6 +294,7 @@ struct mdk_personality_s int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); int (*resize) (mddev_t *mddev, sector_t sectors); int (*reshape) (mddev_t *mddev, int raid_disks); + int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); }; -- cgit v1.2.3 From 2ea6f6859bc53fcbc200c9fd5fb036fda02ee519 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 10 Nov 2004 21:50:41 -0800 Subject: [PATCH] make cdev_get static, unexport - cdev_get is only used in fs/char_dev.c; move it up, make it static and unexport it. - cdev_put is used in one more place (fs/file_table.c) but never in modules; unexport it. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/char_dev.c | 44 ++++++++++++++++++++++---------------------- include/linux/cdev.h | 2 -- 2 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/char_dev.c b/fs/char_dev.c index 00993daec453..51e6461854ab 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -249,6 +249,28 @@ int unregister_chrdev(unsigned int major, const char *name) } static spinlock_t cdev_lock = SPIN_LOCK_UNLOCKED; + +static struct kobject *cdev_get(struct cdev *p) +{ + struct module *owner = p->owner; + struct kobject *kobj; + + if (owner && !try_module_get(owner)) + return NULL; + kobj = kobject_get(&p->kobj); + if (!kobj) + module_put(owner); + return kobj; +} + +void cdev_put(struct cdev *p) +{ + if (p) { + kobject_put(&p->kobj); + module_put(p->owner); + } +} + /* * Called every time a character special file is opened */ @@ -357,26 +379,6 @@ void cdev_del(struct cdev *p) kobject_put(&p->kobj); } -struct kobject *cdev_get(struct cdev *p) -{ - struct module *owner = p->owner; - struct kobject *kobj; - - if (owner && !try_module_get(owner)) - return NULL; - kobj = kobject_get(&p->kobj); - if (!kobj) - module_put(owner); - return kobj; -} - -void cdev_put(struct cdev *p) -{ - if (p) { - kobject_put(&p->kobj); - module_put(p->owner); - } -} static decl_subsys(cdev, NULL, NULL); @@ -447,8 +449,6 @@ EXPORT_SYMBOL(unregister_chrdev_region); EXPORT_SYMBOL(alloc_chrdev_region); EXPORT_SYMBOL(cdev_init); EXPORT_SYMBOL(cdev_alloc); -EXPORT_SYMBOL(cdev_get); -EXPORT_SYMBOL(cdev_put); EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); EXPORT_SYMBOL(register_chrdev); diff --git a/include/linux/cdev.h b/include/linux/cdev.h index f1996ec09e96..8da37e29cb87 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -17,8 +17,6 @@ struct cdev *cdev_alloc(void); void cdev_put(struct cdev *p); -struct kobject *cdev_get(struct cdev *); - int cdev_add(struct cdev *, dev_t, unsigned); void cdev_del(struct cdev *); -- cgit v1.2.3 From 2323f9d42b928f4c804e4926a7a829e81fe4502f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 10 Nov 2004 22:56:17 -0800 Subject: Revert recent EDD changes to use EXTENDED READ comand and CONFIG_EDD_SKIP_MBR It seems to result in unexplained memory corruption. Matt is working on it. Cset exclude: Matt_Domsch@dell.com[torvalds]|ChangeSet|20041020153622|50713 --- arch/i386/boot/edd.S | 126 +++++++---------------------------------------- drivers/firmware/Kconfig | 11 ----- include/linux/edd.h | 4 -- 3 files changed, 18 insertions(+), 123 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S index 61af26a4bee6..889790693bcd 100644 --- a/arch/i386/boot/edd.S +++ b/arch/i386/boot/edd.S @@ -7,66 +7,20 @@ * and Andrew Wilks September 2003, June 2004 * legacy CHS retreival by Patrick J. LoPresti * March 2004 - * Use EXTENDED READ calls if possible, Matt Domsch, October 2004 */ #include #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) +# Read the first sector of each BIOS disk device and store the 4-byte signature +edd_mbr_sig_start: movb $0, (EDD_MBR_SIG_NR_BUF) # zero value at EDD_MBR_SIG_NR_BUF -#ifndef CONFIG_EDD_SKIP_MBR - xorl %edx, %edx movb $0x80, %dl # from device 80 - -edd_mbr_check_ext: - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - movb $CHECKEXTENSIONSPRESENT, %ah # Function 41 - movw $EDDMAGIC1, %bx # magic - pushw %dx # work around buggy BIOSes - stc # work around buggy BIOSes - int $0x13 # make the call - sti # work around buggy BIOSes - popw %dx - jc edd_start # no more BIOS devices - cmpw $EDDMAGIC2, %bx # is magic right? - jne edd_mbr_sig_next # nope, next... - testw $FIXEDDISKSUBSET, %cx # EXTENDED READ supported? - jz edd_mbr_read_sectors # nope, use READ SECTORS - -edd_mbr_extended_read: -# Fill out the device address packet here, make the fn42 call - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - subw $EDD_DEV_ADDR_PACKET_LEN, %sp # put packet on stack - pushw %si - movw %sp, %si - movl $0, (%si) # zero out packet - movl $0, 4(%si) - movl $0, 8(%si) - movl $0, 12(%si) - movb $EDD_DEV_ADDR_PACKET_LEN, (%si) # length of packet - movb $1, 2(%si) # move 1 sector - movw $EDDBUF, 4(%si) # into EDDBUF - movw %ds, 6(%si) # EDDBUF seg is ds - movb $EXTENDEDREAD, %ah - pushw %dx # work around buggy BIOSes - stc # work around buggy BIOSes - int $0x13 - sti # work around buggy BIOSes - popw %dx - popw %si - addw $EDD_DEV_ADDR_PACKET_LEN, %sp # remove packet from stack - jnc edd_mbr_store_sig - # otherwise, fall through to the legacy read function - -edd_mbr_read_sectors: -# Read the first sector of each BIOS disk device and store the 4-byte signature - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx + movw $EDD_MBR_SIG_BUF, %bx # store buffer ptr in bx +edd_mbr_sig_read: + movl $0xFFFFFFFF, %eax + movl %eax, (%bx) # assume failure + pushw %bx movb $READ_SECTORS, %ah movb $1, %al # read 1 sector movb $0, %dh # at head 0 @@ -74,31 +28,23 @@ edd_mbr_read_sectors: pushw %es pushw %ds popw %es - movw $EDDBUF, %bx # disk's data goes into EDDBUF - pushw %dx # work around buggy BIOSes - stc # work around buggy BIOSes + movw $EDDBUF, %bx # disk's data goes into EDDBUF + pushw %dx # work around buggy BIOSes + stc # work around buggy BIOSes int $0x13 - sti # work around buggy BIOSes + sti # work around buggy BIOSes popw %dx popw %es + popw %bx jc edd_mbr_sig_done # on failure, we're done. - -edd_mbr_store_sig: - xorl %ebx, %ebx # clear ebx - movb %dl, %bl # copy drive number to ebx - sub $0x80, %bl # subtract 80h from drive number - shlw $2, %bx # multiply by 4 - addw $EDD_MBR_SIG_BUF, %bx # add to sig_buf - # bx now points to the right sig slot movl (EDDBUF+EDD_MBR_SIG_OFFSET), %eax # read sig out of the MBR movl %eax, (%bx) # store success incb (EDD_MBR_SIG_NR_BUF) # note that we stored something -edd_mbr_sig_next: incb %dl # increment to next device + addw $4, %bx # increment sig buffer ptr cmpb $EDD_MBR_SIG_MAX, (EDD_MBR_SIG_NR_BUF) # Out of space? - jb edd_mbr_check_ext # keep looping + jb edd_mbr_sig_read # keep looping edd_mbr_sig_done: -#endif # Do the BIOS Enhanced Disk Drive calls # This consists of two calls: @@ -131,35 +77,12 @@ edd_start: movw $EDDBUF+EDDEXTSIZE, %si # in ds:si, fn41 results # kept just before that movb $0, (EDDNR) # zero value at EDDNR - xorl %edx, %edx movb $0x80, %dl # BIOS device 0x80 edd_check_ext: - pushw %di # zero out this edd_info block - pushw %es - movw %ds, %ax - movw %ax, %es - movw %si, %ax - subw $EDDEXTSIZE, %ax - movw %ax, %di - movl $EDDEXTSIZE+EDDPARMSIZE, %ecx - xorl %eax, %eax - cld - rep - stosb - popw %es - popw %di - - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx movb $CHECKEXTENSIONSPRESENT, %ah # Function 41 movw $EDDMAGIC1, %bx # magic - pushw %dx # work around buggy BIOSes - stc # work around buggy BIOSes int $0x13 # make the call - sti # work around buggy BIOSes - popw %dx jc edd_done # no more BIOS devices cmpw $EDDMAGIC2, %bx # is magic right? @@ -170,38 +93,25 @@ edd_check_ext: movw %cx, %ds:-6(%si) # store extensions incb (EDDNR) # note that we stored something - testw $GET_DEVICE_PARAMETERS_SUPPORTED, %cx - jz edd_get_legacy_chs # nope, skip fn48 - edd_get_device_params: - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx movw $EDDPARMSIZE, %ds:(%si) # put size + movw $0x0, %ds:2(%si) # work around buggy BIOSes movb $GETDEVICEPARAMETERS, %ah # Function 48 - pushw %dx # work around buggy BIOSes - stc # work around buggy BIOSes int $0x13 # make the call - sti # work around buggy BIOSes - popw %dx # Don't check for fail return # it doesn't matter. edd_get_legacy_chs: - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx + xorw %ax, %ax movw %ax, %ds:-4(%si) movw %ax, %ds:-2(%si) - # Ralf Brown's Interrupt List says to set ES:DI to + # Ralf Brown's Interrupt List says to set ES:DI to # 0000h:0000h "to guard against BIOS bugs" - pushw %es + pushw %es movw %ax, %es movw %ax, %di pushw %dx # legacy call clobbers %dl movb $LEGACYGETDEVICEPARAMETERS, %ah # Function 08 - stc # work around buggy BIOSes int $0x13 # make the call - sti # work around buggy BIOSes jc edd_legacy_done # failed movb %cl, %al # Low 6 bits are max andb $0x3F, %al # sector number diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index b88322c91528..4607ddd4693e 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -18,17 +18,6 @@ config EDD obscure configurations. Most disk controller BIOS vendors do not yet implement this feature. -config EDD_SKIP_MBR - bool "EDD: Skip Master Boot Record read" - depends on EDD - default n - help - Most controller BIOSs properly implement real-mode legacy - READ SECTORS commands. A few don't. - - If your controller hangs during the kernel's real-mode - startup routine, say Y here. - config EFI_VARS tristate "EFI Variable Support via sysfs" depends on EFI diff --git a/include/linux/edd.h b/include/linux/edd.h index 242600817a8d..5f93881106fa 100644 --- a/include/linux/edd.h +++ b/include/linux/edd.h @@ -37,18 +37,14 @@ #define EDDEXTSIZE 8 /* change these if you muck with the structures */ #define EDDPARMSIZE 74 #define CHECKEXTENSIONSPRESENT 0x41 -#define EXTENDEDREAD 0x42 #define GETDEVICEPARAMETERS 0x48 #define LEGACYGETDEVICEPARAMETERS 0x08 #define EDDMAGIC1 0x55AA #define EDDMAGIC2 0xAA55 -#define FIXEDDISKSUBSET 0x0001 -#define GET_DEVICE_PARAMETERS_SUPPORTED 0x0007 #define READ_SECTORS 0x02 /* int13 AH=0x02 is READ_SECTORS command */ #define EDD_MBR_SIG_OFFSET 0x1B8 /* offset of signature in the MBR */ -#define EDD_DEV_ADDR_PACKET_LEN 0x10 /* for int13 fn42 */ #define EDD_MBR_SIG_BUF 0x290 /* addr in boot params */ #define EDD_MBR_SIG_MAX 16 /* max number of signatures to store */ #define EDD_MBR_SIG_NR_BUF 0x1ea /* addr of number of MBR signtaures at EDD_MBR_SIG_BUF -- cgit v1.2.3