From 68d9807c087b3dd14d6e4f46488285ae97cddbd7 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 20 Aug 2002 19:02:22 +0400 Subject: Add displaying of more reiserfs statistical info through /proc interface, by Nikita Danilov --- include/linux/reiserfs_fs_sb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 534d8e20bdb7..13632da04604 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -270,6 +270,12 @@ typedef struct reiserfs_proc_info_data stat_cnt_t search_by_key_fs_changed; stat_cnt_t search_by_key_restarted; + stat_cnt_t insert_item_restarted; + stat_cnt_t paste_into_item_restarted; + stat_cnt_t cut_from_item_restarted; + stat_cnt_t delete_solid_item_restarted; + stat_cnt_t delete_item_restarted; + stat_cnt_t leaked_oid; stat_cnt_t leaves_removable; -- cgit v1.2.3 From 7924d769843c77bb02066e1335f06b0a2a4e02cb Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 20 Aug 2002 19:20:02 +0400 Subject: Implemented new block allocator for reiserfs, new tail policy for smaller tails. New block allocator contains code from Alexander Zarochencev, Jeff Mahoney and Oleg Drokin. --- fs/reiserfs/bitmap.c | 1338 +++++++++++++++++++++++----------------- fs/reiserfs/fix_node.c | 4 +- fs/reiserfs/hashes.c | 10 +- fs/reiserfs/inode.c | 72 +-- fs/reiserfs/journal.c | 13 +- fs/reiserfs/namei.c | 4 + fs/reiserfs/procfs.c | 33 +- fs/reiserfs/resize.c | 48 +- fs/reiserfs/stree.c | 14 +- fs/reiserfs/super.c | 149 ++++- include/linux/reiserfs_fs.h | 108 +++- include/linux/reiserfs_fs_i.h | 4 + include/linux/reiserfs_fs_sb.h | 39 +- 13 files changed, 1113 insertions(+), 723 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 50ce0a8f654b..0fc0760e9667 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1,26 +1,63 @@ /* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ +/* Reiserfs block (de)allocator, bitmap-based. */ #include #include #include -#include -#include +#include #include +#include +#include +#include +#include + +#define PREALLOCATION_SIZE 9 + +/* different reiserfs block allocator options */ + +#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits) + +#define _ALLOC_concentrating_formatted_nodes 0 +#define _ALLOC_displacing_large_files 1 +#define _ALLOC_displacing_new_packing_localities 2 +#define _ALLOC_old_hashed_relocation 3 +#define _ALLOC_new_hashed_relocation 4 +#define _ALLOC_skip_busy 5 +#define _ALLOC_displace_based_on_dirid 6 +#define _ALLOC_hashed_formatted_nodes 7 +#define _ALLOC_old_way 8 +#define _ALLOC_hundredth_slices 9 + +#define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s)) +#define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s)) +#define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s)) + +#define SET_OPTION(optname) \ + do { \ + reiserfs_warning("reiserfs: option \"%s\" is set\n", #optname); \ + set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \ + } while(0) +#define TEST_OPTION(optname, s) \ + test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) + +static inline void get_bit_address (struct super_block * s, + unsigned long block, int * bmap_nr, int * offset) +{ + /* It is in the bitmap block number equal to the block + * number divided by the number of bits in a block. */ + *bmap_nr = block / (s->s_blocksize << 3); + /* Within that bitmap block it is located at bit offset *offset. */ + *offset = block & ((s->s_blocksize << 3) - 1 ); + return; +} #ifdef CONFIG_REISERFS_CHECK - -/* this is a safety check to make sure -** blocks are reused properly. used for debugging only. -** -** this checks, that block can be reused, and it has correct state -** (free or busy) -*/ int is_reusable (struct super_block * s, unsigned long block, int bit_value) { int i, j; - + if (block == 0 || block >= SB_BLOCK_COUNT (s)) { reiserfs_warning ("vs-4010: is_reusable: block number is out of range %lu (%u)\n", block, SB_BLOCK_COUNT (s)); @@ -29,104 +66,269 @@ int is_reusable (struct super_block * s, unsigned long block, int bit_value) /* it can't be one of the bitmap blocks */ for (i = 0; i < SB_BMAP_NR (s); i ++) - if (block == SB_AP_BITMAP (s)[i]->b_blocknr) { + if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) { reiserfs_warning ("vs: 4020: is_reusable: " "bitmap block %lu(%u) can't be freed or reused\n", block, SB_BMAP_NR (s)); return 0; } - i = block / (s->s_blocksize << 3); + get_bit_address (s, block, &i, &j); + if (i >= SB_BMAP_NR (s)) { reiserfs_warning ("vs-4030: is_reusable: there is no so many bitmap blocks: " "block=%lu, bitmap_nr=%d\n", block, i); return 0; } - j = block % (s->s_blocksize << 3); if ((bit_value == 0 && - reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i]->b_data)) || + reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) || (bit_value == 1 && - reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i]->b_data) == 0)) { + reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) { reiserfs_warning ("vs-4040: is_reusable: corresponding bit of block %lu does not " "match required value (i==%d, j==%d) test_bit==%d\n", - block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)); + block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data)); + return 0; } if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) { reiserfs_warning ("vs-4050: is_reusable: this is root block (%u), " - "it must be busy", SB_ROOT_BLOCK (s)); + "it must be busy\n", SB_ROOT_BLOCK (s)); return 0; } return 1; } +#endif /* CONFIG_REISERFS_CHECK */ +/* searches in journal structures for a given block number (bmap, off). If block + is found in reiserfs journal it suggests next free block candidate to test. */ +static inline int is_block_in_journal (struct super_block * s, int bmap, int +off, int *next) +{ + unsigned long tmp; + if (reiserfs_in_journal (s, bmap, off, 1, &tmp)) { + if (tmp) { /* hint supplied */ + *next = tmp; + PROC_INFO_INC( s, scan_bitmap.in_journal_hint ); + } else { + (*next) = off + 1; /* inc offset to avoid looping. */ + PROC_INFO_INC( s, scan_bitmap.in_journal_nohint ); + } + PROC_INFO_INC( s, scan_bitmap.retry ); + return 1; + } + return 0; +} + +/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap + * block; */ +static int scan_bitmap_block (struct reiserfs_transaction_handle *th, + int bmap_n, int *beg, int boundary, int min, int max, int unfm) +{ + struct super_block *s = th->t_super; + struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n]; + int end, next; + int org = *beg; + + RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)\n",bmap_n, SB_BMAP_NR (s) - 1); + PROC_INFO_INC( s, scan_bitmap.bmap ); +/* this is unclear and lacks comments, explain how journal bitmaps + work here for the reader. Convey a sense of the design here. What + is a window? */ +/* - I mean `a window of zero bits' as in description of this function - Zam. */ + + if ( !bi ) { + printk("Hey, bitmap info pointer is zero for bitmap %d!\n",bmap_n); + return 0; + } + if (buffer_locked (bi->bh)) { + PROC_INFO_INC( s, scan_bitmap.wait ); + __wait_on_buffer (bi->bh); + } + /* If we know that first zero bit is only one or first zero bit is + closer to the end of bitmap than our start pointer */ + if (bi->first_zero_hint > *beg || bi->free_count == 1) + *beg = bi->first_zero_hint; -#endif /* CONFIG_REISERFS_CHECK */ + while (1) { + cont: + if (bi->free_count < min) + return 0; // No free blocks in this bitmap + + /* search for a first zero bit -- beggining of a window */ + *beg = reiserfs_find_next_zero_le_bit + ((unsigned long*)(bi->bh->b_data), boundary, *beg); + + if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block + * cannot contain a zero window of minimum size */ + return 0; + } -/* get address of corresponding bit (bitmap block number and offset in it) */ -static inline void get_bit_address (struct super_block * s, unsigned long block, int * bmap_nr, int * offset) + if (unfm && is_block_in_journal(s,bmap_n, *beg, beg)) + continue; + /* first zero bit found; we check next bits */ + for (end = *beg + 1;; end ++) { + if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) { + next = end; + break; + } + /* finding the other end of zero bit window requires looking into journal structures (in + * case of searching for free blocks for unformatted nodes) */ + if (unfm && is_block_in_journal(s, bmap_n, end, &next)) + break; + } + + /* now (*beg) points to beginning of zero bits window, + * (end) points to one bit after the window end */ + if (end - *beg >= min) { /* it seems we have found window of proper size */ + int i; + reiserfs_prepare_for_journal (s, bi->bh, 1); + /* try to set all blocks used checking are they still free */ + for (i = *beg; i < end; i++) { + /* It seems that we should not check in journal again. */ + if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) { + /* bit was set by another process + * while we slept in prepare_for_journal() */ + PROC_INFO_INC( s, scan_bitmap.stolen ); + if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, + * if length of this set is more or equal to `min' */ + end = i; + break; + } + /* otherwise we clear all bit were set ... */ + while (--i >= *beg) + reiserfs_test_and_clear_le_bit (i, bi->bh->b_data); + reiserfs_restore_prepared_buffer (s, bi->bh); + *beg = max(org, (int)bi->first_zero_hint); + /* ... and search again in current block from beginning */ + goto cont; + } + } + bi->free_count -= (end - *beg); + + /* if search started from zero_hint bit, and zero hint have not + changed since, then we need to update first_zero_hint */ + if ( bi->first_zero_hint >= *beg) + /* no point in looking for free bit if there is not any */ + bi->first_zero_hint = (bi->free_count > 0 ) ? + reiserfs_find_next_zero_le_bit + ((unsigned long*)(bi->bh->b_data), s->s_blocksize << 3, end) : (s->s_blocksize << 3); + + journal_mark_dirty (th, s, bi->bh); + + /* free block count calculation */ + reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1); + PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); + journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s)); + + return end - (*beg); + } else { + *beg = next; + } + } + } + +/* Tries to find contiguous zero bit window (given size) in given region of + * bitmap and place new blocks there. Returns number of allocated blocks. */ +static int scan_bitmap (struct reiserfs_transaction_handle *th, + unsigned long *start, unsigned long finish, + int min, int max, int unfm, unsigned long file_block) { - /* It is in the bitmap block number equal to the block number divided by the number of - bits in a block. */ - *bmap_nr = block / (s->s_blocksize << 3); - /* Within that bitmap block it is located at bit offset *offset. */ - *offset = block % (s->s_blocksize << 3); - return; -} + int nr_allocated=0; + struct super_block * s = th->t_super; + /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr + * - Hans, it is not a block number - Zam. */ + + int bm, off; + int end_bm, end_off; + int off_max = s->s_blocksize << 3; + + PROC_INFO_INC( s, scan_bitmap.call ); + if ( SB_FREE_BLOCKS(s) <= 0) + return 0; // No point in looking for more free blocks + + get_bit_address (s, *start, &bm, &off); + get_bit_address (s, finish, &end_bm, &end_off); + + // With this option set first we try to find a bitmap that is at least 10% + // free, and if that fails, then we fall back to old whole bitmap scanning + if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) { + for (;bm < end_bm; bm++, off = 0) { + if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 ) + nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); + if (nr_allocated) + goto ret; + } + get_bit_address (s, *start, &bm, &off); + } + + for (;bm < end_bm; bm++, off = 0) { + nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); + if (nr_allocated) + goto ret; + } + nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); + + ret: + *start = bm * off_max + off; + return nr_allocated; + +} -/* There would be a modest performance benefit if we write a version - to free a list of blocks at once. -Hans */ - /* I wonder if it would be less modest - now that we use journaling. -Hans */ -static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long block) +static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, + unsigned long block) { struct super_block * s = th->t_super; struct reiserfs_super_block * rs; struct buffer_head * sbh; - struct buffer_head ** apbh; + struct reiserfs_bitmap_info *apbi; int nr, offset; - PROC_INFO_INC( s, free_block ); + PROC_INFO_INC( s, free_block ); - rs = SB_DISK_SUPER_BLOCK (s); - sbh = SB_BUFFER_WITH_SB (s); - apbh = SB_AP_BITMAP (s); + rs = SB_DISK_SUPER_BLOCK (s); + sbh = SB_BUFFER_WITH_SB (s); + apbi = SB_AP_BITMAP(s); - get_bit_address (s, block, &nr, &offset); + get_bit_address (s, block, &nr, &offset); - if (nr >= sb_bmap_nr (rs)) { - reiserfs_warning ("vs-4075: reiserfs_free_block: " - "block %lu is out of range on %s\n", - block, reiserfs_bdevname (s)); - return; - } + if (nr >= sb_bmap_nr (rs)) { + reiserfs_warning ("vs-4075: reiserfs_free_block: " + "block %lu is out of range on %s\n", + block, reiserfs_bdevname (s)); + return; + } - reiserfs_prepare_for_journal(s, apbh[nr], 1 ) ; + reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ; - /* clear bit for the given block in bit map */ - if (!reiserfs_test_and_clear_le_bit (offset, apbh[nr]->b_data)) { - reiserfs_warning ("vs-4080: reiserfs_free_block: " - "free_block (%s:%lu)[dev:blocknr]: bit already cleared\n", - reiserfs_bdevname (s), block); - } - journal_mark_dirty (th, s, apbh[nr]); + /* clear bit for the given block in bit map */ + if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) { + reiserfs_warning ("vs-4080: reiserfs_free_block: " + "free_block (%s:%lu)[dev:blocknr]: bit already cleared\n", + reiserfs_bdevname (s), block); + } + if (offset < apbi[nr].first_zero_hint) { + apbi[nr].first_zero_hint = offset; + } + apbi[nr].free_count ++; + journal_mark_dirty (th, s, apbi[nr].bh); - reiserfs_prepare_for_journal(s, sbh, 1) ; - /* update super block */ - set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 ); + reiserfs_prepare_for_journal(s, sbh, 1) ; + /* update super block */ + set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 ); - journal_mark_dirty (th, s, sbh); + journal_mark_dirty (th, s, sbh); s->s_dirt = 1; } void reiserfs_free_block (struct reiserfs_transaction_handle *th, - unsigned long block) { + unsigned long block) +{ struct super_block * s = th->t_super; RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); @@ -144,571 +346,557 @@ void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th, _reiserfs_free_block(th, block) ; } -/* beginning from offset-th bit in bmap_nr-th bitmap block, - find_forward finds the closest zero bit. It returns 1 and zero - bit address (bitmap, offset) if zero bit found or 0 if there is no - zero bit in the forward direction */ -/* The function is NOT SCHEDULE-SAFE! */ -static int find_forward (struct super_block * s, int * bmap_nr, int * offset, int for_unformatted) +static void __discard_prealloc (struct reiserfs_transaction_handle * th, + struct reiserfs_inode_info *ei) +{ + unsigned long save = ei->i_prealloc_block ; +#ifdef CONFIG_REISERFS_CHECK + if (ei->i_prealloc_count < 0) + reiserfs_warning("zam-4001:%s: inode has negative prealloc blocks count.\n", __FUNCTION__ ); +#endif + while (ei->i_prealloc_count > 0) { + reiserfs_free_prealloc_block(th,ei->i_prealloc_block); + ei->i_prealloc_block++; + ei->i_prealloc_count --; + } + ei->i_prealloc_block = save; + list_del_init(&(ei->i_prealloc_list)); +} + +/* FIXME: It should be inline function */ +void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, + struct inode * inode) { - int i, j; - struct buffer_head * bh; - unsigned long block_to_try = 0; - unsigned long next_block_to_try = 0 ; - - PROC_INFO_INC( s, find_forward.call ); - - for (i = *bmap_nr; i < SB_BMAP_NR (s); i ++, *offset = 0, - PROC_INFO_INC( s, find_forward.bmap )) { - /* get corresponding bitmap block */ - bh = SB_AP_BITMAP (s)[i]; - if (buffer_locked (bh)) { - PROC_INFO_INC( s, find_forward.wait ); - __wait_on_buffer (bh); + struct reiserfs_inode_info *ei = REISERFS_I(inode); + if (ei->i_prealloc_count) { + __discard_prealloc(th, ei); } -retry: - j = reiserfs_find_next_zero_le_bit ((unsigned long *)bh->b_data, - s->s_blocksize << 3, *offset); - - /* wow, this really needs to be redone. We can't allocate a block if - ** it is in the journal somehow. reiserfs_in_journal makes a suggestion - ** for a good block if the one you ask for is in the journal. Note, - ** reiserfs_in_journal might reject the block it suggests. The big - ** gain from the suggestion is when a big file has been deleted, and - ** many blocks show free in the real bitmap, but are all not free - ** in the journal list bitmaps. - ** - ** this whole system sucks. The bitmaps should reflect exactly what - ** can and can't be allocated, and the journal should update them as - ** it goes. TODO. - */ - if (j < (s->s_blocksize << 3)) { - block_to_try = (i * (s->s_blocksize << 3)) + j; - - /* the block is not in the journal, we can proceed */ - if (!(reiserfs_in_journal(s, block_to_try, for_unformatted, &next_block_to_try))) { - *bmap_nr = i; - *offset = j; - return 1; - } - /* the block is in the journal */ - else if ((j+1) < (s->s_blocksize << 3)) { /* try again */ - /* reiserfs_in_journal suggested a new block to try */ - if (next_block_to_try > 0) { - int new_i ; - get_bit_address (s, next_block_to_try, &new_i, offset); - - PROC_INFO_INC( s, find_forward.in_journal_hint ); - - /* block is not in this bitmap. reset i and continue - ** we only reset i if new_i is in a later bitmap. - */ - if (new_i > i) { - i = (new_i - 1 ); /* i gets incremented by the for loop */ - PROC_INFO_INC( s, find_forward.in_journal_out ); - continue ; - } - } else { - /* no suggestion was made, just try the next block */ - *offset = j+1 ; +} + +void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th) +{ + struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; + + while (!list_empty(plist)) { + struct reiserfs_inode_info *ei; + ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list); +#ifdef CONFIG_REISERFS_CHECK + if (!ei->i_prealloc_count) { + reiserfs_warning("zam-4001:%s: inode is in prealloc list but has no preallocated blocks.\n", __FUNCTION__); } - PROC_INFO_INC( s, find_forward.retry ); - goto retry ; - } +#endif + __discard_prealloc(th, ei); } - } - /* zero bit not found */ - return 0; } - -/* return 0 if no free blocks, else return 1 */ -/* The function is NOT SCHEDULE-SAFE! -** because the bitmap block we want to change could be locked, and on its -** way to the disk when we want to read it, and because of the -** flush_async_commits. Per bitmap block locks won't help much, and -** really aren't needed, as we retry later on if we try to set the bit -** and it is already set. -*/ -static int find_zero_bit_in_bitmap (struct super_block * s, - unsigned long search_start, - int * bmap_nr, int * offset, - int for_unformatted) +/* block allocator related options are parsed here */ +int reiserfs_parse_alloc_options(struct super_block * s, char * options) { - int retry_count = 0 ; - /* get bit location (bitmap number and bit offset) of search_start block */ - get_bit_address (s, search_start, bmap_nr, offset); - - /* note that we search forward in the bitmap, benchmarks have shown that it is better to allocate in increasing - sequence, which is probably due to the disk spinning in the forward direction.. */ - if (find_forward (s, bmap_nr, offset, for_unformatted) == 0) { - /* there wasn't a free block with number greater than our - starting point, so we are going to go to the beginning of the disk */ - -retry: - search_start = 0; /* caller will reset search_start for itself also. */ - get_bit_address (s, search_start, bmap_nr, offset); - if (find_forward (s, bmap_nr,offset,for_unformatted) == 0) { - if (for_unformatted) { /* why only unformatted nodes? -Hans */ - if (retry_count == 0) { - /* we've got a chance that flushing async commits will free up - ** some space. Sync then retry - */ - flush_async_commits(s) ; - retry_count++ ; - goto retry ; - } else if (retry_count > 0) { - /* nothing more we can do. Make the others wait, flush - ** all log blocks to disk, and flush to their home locations. - ** this will free up any blocks held by the journal - */ - SB_JOURNAL(s)->j_must_wait = 1 ; - } + char * this_char, * value; + + REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ + + for (this_char = strsep (&options, ":"); this_char != NULL; ) { + if ((value = strchr (this_char, '=')) != NULL) + *value++ = 0; + + if (!strcmp(this_char, "concentrating_formatted_nodes")) { + int temp; + SET_OPTION(concentrating_formatted_nodes); + temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10; + if (temp <= 0 || temp > 100) { + REISERFS_SB(s)->s_alloc_options.border = 10; + } else { + REISERFS_SB(s)->s_alloc_options.border = 100 / temp; + } + continue; + } + if (!strcmp(this_char, "displacing_large_files")) { + SET_OPTION(displacing_large_files); + REISERFS_SB(s)->s_alloc_options.large_file_size = + (value && *value) ? simple_strtoul (value, &value, 0) : 16; + continue; + } + if (!strcmp(this_char, "displacing_new_packing_localities")) { + SET_OPTION(displacing_new_packing_localities); + continue; + }; + + if (!strcmp(this_char, "old_hashed_relocation")) { + SET_OPTION(old_hashed_relocation); + continue; } - return 0; + + if (!strcmp(this_char, "new_hashed_relocation")) { + SET_OPTION(new_hashed_relocation); + continue; + } + + if (!strcmp(this_char, "hashed_formatted_nodes")) { + SET_OPTION(hashed_formatted_nodes); + continue; + } + + if (!strcmp(this_char, "skip_busy")) { + SET_OPTION(skip_busy); + continue; + } + + if (!strcmp(this_char, "hundredth_slices")) { + SET_OPTION(hundredth_slices); + continue; + } + + if (!strcmp(this_char, "old_way")) { + SET_OPTION(old_way); + continue; + } + + if (!strcmp(this_char, "displace_based_on_dirid")) { + SET_OPTION(displace_based_on_dirid); + continue; + } + + if (!strcmp(this_char, "preallocmin")) { + REISERFS_SB(s)->s_alloc_options.preallocmin = + (value && *value) ? simple_strtoul (value, &value, 0) : 4; + continue; + } + + if (!strcmp(this_char, "preallocsize")) { + REISERFS_SB(s)->s_alloc_options.preallocsize = + (value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE; + continue; + } + + reiserfs_warning("zam-4001: %s : unknown option - %s\n", __FUNCTION__ , this_char); + return 1; } - } - return 1; + + return 0; +} + +static void inline new_hashed_relocation (reiserfs_blocknr_hint_t * hint) +{ + char * hash_in; + if (hint->formatted_node) { + hash_in = (char*)&hint->key.k_dir_id; + } else { + if (!hint->inode) { + //hint->search_start = hint->beg; + hash_in = (char*)&hint->key.k_dir_id; + } else + if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) + hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); + else + hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); + } + + hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); } -/* get amount_needed free block numbers from scanning the bitmap of - free/used blocks. - - Optimize layout by trying to find them starting from search_start - and moving in increasing blocknr direction. (This was found to be - faster than using a bi-directional elevator_direction, in part - because of disk spin direction, in part because by the time one - reaches the end of the disk the beginning of the disk is the least - congested). - - search_start is the block number of the left - semantic neighbor of the node we create. - - return CARRY_ON if everything is ok - return NO_DISK_SPACE if out of disk space - return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS if the block we found is not contiguous to the last one - - return block numbers found, in the array free_blocknrs. assumes - that any non-zero entries already present in the array are valid. - This feature is perhaps convenient coding when one might not have - used all blocknrs from the last time one called this function, or - perhaps it is an archaism from the days of schedule tracking, one - of us ought to reread the code that calls this, and analyze whether - it is still the right way to code it. - - spare space is used only when priority is set to 1. reiserfsck has - its own reiserfs_new_blocknrs, which can use reserved space - - exactly what reserved space? the SPARE_SPACE? if so, please comment reiserfs.h. - - Give example of who uses spare space, and say that it is a deadlock - avoidance mechanism. -Hans */ - -/* This function is NOT SCHEDULE-SAFE! */ - -static int do_reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, - unsigned long * free_blocknrs, - unsigned long search_start, - int amount_needed, int priority, - int for_unformatted, - int for_prealloc) +static void inline get_left_neighbor(reiserfs_blocknr_hint_t *hint) { - struct super_block * s = th->t_super; - int i, j; - unsigned long * block_list_start = free_blocknrs; - int init_amount_needed = amount_needed; - unsigned long new_block = 0 ; - - if (SB_FREE_BLOCKS (s) < SPARE_SPACE && !priority) - /* we can answer NO_DISK_SPACE being asked for new block with - priority 0 */ - return NO_DISK_SPACE; - - RFALSE( !s, "vs-4090: trying to get new block from nonexistent device"); - RFALSE( search_start == MAX_B_NUM, - "vs-4100: we are optimizing location based on " - "the bogus location of a temp buffer (%lu).", search_start); - RFALSE( amount_needed < 1 || amount_needed > 2, - "vs-4110: amount_needed parameter incorrect (%d)", amount_needed); - - /* We continue the while loop if another process snatches our found - * free block from us after we find it but before we successfully - * mark it as in use */ - - while (amount_needed--) { - /* skip over any blocknrs already gotten last time. */ - if (*(free_blocknrs) != 0) { - RFALSE( is_reusable (s, *free_blocknrs, 1) == 0, - "vs-4120: bad blocknr on free_blocknrs list"); - free_blocknrs++; - continue; - } - /* look for zero bits in bitmap */ - if (find_zero_bit_in_bitmap(s,search_start, &i, &j,for_unformatted) == 0) { - if (find_zero_bit_in_bitmap(s,search_start,&i,&j, for_unformatted) == 0) { - /* recode without the goto and without - the if. It will require a - duplicate for. This is worth the - code clarity. Your way was - admirable, and just a bit too - clever in saving instructions.:-) - I'd say create a new function, but - that would slow things also, yes? - -Hans */ -free_and_return: - for ( ; block_list_start != free_blocknrs; block_list_start++) { - reiserfs_free_block (th, *block_list_start); - *block_list_start = 0; + struct path * path; + struct buffer_head * bh; + struct item_head * ih; + int pos_in_item; + __u32 * item; + + if (!hint->path) /* reiserfs code can call this function w/o pointer to path + * structure supplied; then we rely on supplied search_start */ + return; + + path = hint->path; + bh = get_last_bh(path); + RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor\n"); + ih = get_ih(path); + pos_in_item = path->pos_in_item; + item = get_item (path); + + hint->search_start = bh->b_blocknr; + + if (!hint->formatted_node && is_indirect_le_ih (ih)) { + /* for indirect item: go to left and look for the first non-hole entry + in the indirect item */ + if (pos_in_item == I_UNFM_NUM (ih)) + pos_in_item--; +// pos_in_item = I_UNFM_NUM (ih) - 1; + while (pos_in_item >= 0) { + int t=get_block_num(item,pos_in_item); + if (t) { + hint->search_start = t; + break; + } + pos_in_item --; } - if (for_prealloc) - return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS; - else - return NO_DISK_SPACE; + } else { } - } - - /* i and j now contain the results of the search. i = bitmap block - number containing free block, j = offset in this block. we - compute the blocknr which is our result, store it in - free_blocknrs, and increment the pointer so that on the next - loop we will insert into the next location in the array. Also - in preparation for the next loop, search_start is changed so - that the next search will not rescan the same range but will - start where this search finished. Note that while it is - possible that schedule has occurred and blocks have been freed - in that range, it is perhaps more important that the blocks - returned be near each other than that they be near their other - neighbors, and it also simplifies and speeds the code this way. */ - - /* journal: we need to make sure the block we are giving out is not - ** a log block, horrible things would happen there. - */ - new_block = (i * (s->s_blocksize << 3)) + j; - if (for_prealloc && (new_block - 1) != search_start) { - /* preallocated blocks must be contiguous, bail if we didnt find one. - ** this is not a bug. We want to do the check here, before the - ** bitmap block is prepared, and before we set the bit and log the - ** bitmap. - ** - ** If we do the check after this function returns, we have to - ** call reiserfs_free_block for new_block, which would be pure - ** overhead. - ** - ** for_prealloc should only be set if the caller can deal with the - ** NO_MORE_UNUSED_CONTIGUOUS_BLOCKS return value. This can be - ** returned before the disk is actually full - */ - goto free_and_return ; - } - search_start = new_block ; + /* does result value fit into specified region? */ + return; +} - /* make sure the block is not of journal or reserved area */ - if (is_block_in_log_or_reserved_area(s, search_start)) { - reiserfs_warning("vs-4130: reiserfs_new_blocknrs: trying to allocate log block %lu\n", - search_start) ; - search_start++ ; - amount_needed++ ; - continue ; - } - - - reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ; - - RFALSE( buffer_locked (SB_AP_BITMAP (s)[i]) || - is_reusable (s, search_start, 0) == 0, - "vs-4140: bitmap block is locked or bad block number found"); - - /* if this bit was already set, we've scheduled, and someone else - ** has allocated it. loop around and try again - */ - if (reiserfs_test_and_set_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)) { - reiserfs_warning("vs-4150: reiserfs_new_blocknrs, block not free"); - reiserfs_restore_prepared_buffer(s, SB_AP_BITMAP(s)[i]) ; - amount_needed++ ; - continue ; - } - journal_mark_dirty (th, s, SB_AP_BITMAP (s)[i]); - *free_blocknrs = search_start ; - free_blocknrs ++; - } +/* should be, if formatted node, then try to put on first part of the device + specified as number of percent with mount option device, else try to put + on last of device. This is not to say it is good code to do so, + but the effect should be measured. */ +static void inline set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint) +{ + b_blocknr_t border = SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border; - reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; - /* update free block count in super block */ - PUT_SB_FREE_BLOCKS( s, SB_FREE_BLOCKS(s) - init_amount_needed ); - journal_mark_dirty (th, s, SB_BUFFER_WITH_SB (s)); - s->s_dirt = 1; + if (hint->formatted_node) + hint->end = border - 1; + else + hint->beg = border; +} - return CARRY_ON; +static void inline displace_large_file(reiserfs_blocknr_hint_t *hint) +{ + if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) + hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg); + else + hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg); } -// this is called only by get_empty_nodes -int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs, - unsigned long search_start, int amount_needed) { - return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, amount_needed, 0/*priority*/, 0/*for_formatted*/, 0/*for_prealloc */) ; +static void inline hash_formatted_node(reiserfs_blocknr_hint_t *hint) +{ + char * hash_in; + + if (!hint->inode) + hash_in = (char*)&hint->key.k_dir_id; + else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) + hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); + else + hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); + + hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); } +static int inline this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint) +{ + return hint->block == REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size; +} -// called by get_new_buffer and by reiserfs_get_block with amount_needed == 1 -int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs, - unsigned long search_start) { - return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, - 1/*amount_needed*/, - 0/*priority*/, - 1/*for formatted*/, - 0/*for prealloc */) ; +#ifdef DISPLACE_NEW_PACKING_LOCALITIES +static void inline displace_new_packing_locality (reiserfs_blocknr_hint_t *hint) +{ + struct key * key = &hint->key; + + hint->th->displace_new_blocks = 0; + hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg); } + #endif -#ifdef REISERFS_PREALLOCATE - -/* -** We pre-allocate 8 blocks. Pre-allocation is used for files > 16 KB only. -** This lowers fragmentation on large files by grabbing a contiguous set of -** blocks at once. It also limits the number of times the bitmap block is -** logged by making X number of allocation changes in a single transaction. -** -** We are using a border to divide the disk into two parts. The first part -** is used for tree blocks, which have a very high turnover rate (they -** are constantly allocated then freed) -** -** The second part of the disk is for the unformatted nodes of larger files. -** Putting them away from the tree blocks lowers fragmentation, and makes -** it easier to group files together. There are a number of different -** allocation schemes being tried right now, each is documented below. -** -** A great deal of the allocator's speed comes because reiserfs_get_block -** sends us the block number of the last unformatted node in the file. Once -** a given block is allocated past the border, we don't collide with the -** blocks near the search_start again. -** -*/ -int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, - struct inode * p_s_inode, - unsigned long * free_blocknrs, - unsigned long search_start) +static int inline old_hashed_relocation (reiserfs_blocknr_hint_t * hint) { - struct reiserfs_inode_info *ei = REISERFS_I(p_s_inode); - int ret=0, blks_gotten=0; - unsigned long border = 0; - unsigned long bstart = 0; - unsigned long hash_in, hash_out; - unsigned long saved_search_start=search_start; - int allocated[PREALLOCATION_SIZE]; - int blks; - - if (!reiserfs_no_border(th->t_super)) { - /* we default to having the border at the 10% mark of the disk. This - ** is an arbitrary decision and it needs tuning. It also needs a limit - ** to prevent it from taking too much space on huge drives. - */ - bstart = (SB_BLOCK_COUNT(th->t_super) / 10); - } - if (!reiserfs_no_unhashed_relocation(th->t_super)) { - /* this is a very simple first attempt at preventing too much grouping - ** around the border value. Since k_dir_id is never larger than the - ** highest allocated oid, it is far from perfect, and files will tend - ** to be grouped towards the start of the border - */ - border = le32_to_cpu(INODE_PKEY(p_s_inode)->k_dir_id) % (SB_BLOCK_COUNT(th->t_super) - bstart - 1) ; - } else if (!reiserfs_hashed_relocation(th->t_super)) { - hash_in = le32_to_cpu((INODE_PKEY(p_s_inode))->k_dir_id); - /* I wonder if the CPU cost of the - hash will obscure the layout - effect? Of course, whether that - effect is good or bad we don't - know.... :-) */ - - hash_out = keyed_hash(((char *) (&hash_in)), 4); - border = hash_out % (SB_BLOCK_COUNT(th->t_super) - bstart - 1) ; + unsigned long border; + unsigned long hash_in; + + if (hint->formatted_node || hint->inode == NULL) { + return 0; + } + + hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); + border = hint->beg + (unsigned long) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1); + if (border > hint->search_start) + hint->search_start = border; + + return 1; } - border += bstart ; - allocated[0] = 0 ; /* important. Allows a check later on to see if at - * least one block was allocated. This prevents false - * no disk space returns - */ - - if ( (p_s_inode->i_size < 4 * 4096) || - !(S_ISREG(p_s_inode->i_mode)) ) - { - if ( search_start < border - || ( - /* allow us to test whether it is a - good idea to prevent files from - getting too far away from their - packing locality by some unexpected - means. This might be poor code for - directories whose files total - larger than 1/10th of the disk, and - it might be good code for - suffering from old insertions when the disk - was almost full. */ - /* changed from !reiserfs_test3(th->t_super), which doesn't - ** seem like a good idea. Think about adding blocks to - ** a large file. If you've allocated 10% of the disk - ** in contiguous blocks, you start over at the border value - ** for every new allocation. This throws away all the - ** information sent in about the last block that was allocated - ** in the file. Not a good general case at all. - ** -chris - */ - reiserfs_test4(th->t_super) && - (search_start > border + (SB_BLOCK_COUNT(th->t_super) / 10)) - ) - ) - search_start=border; - ret = do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, - 1/*amount_needed*/, - 0/*use reserved blocks for root */, - 1/*for_formatted*/, - 0/*for prealloc */) ; - return ret; +static int inline old_way (reiserfs_blocknr_hint_t * hint) +{ + unsigned long border; + + if (hint->formatted_node || hint->inode == NULL) { + return 0; } + + border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - hint->beg); + if (border > hint->search_start) + hint->search_start = border; - /* take a block off the prealloc list and return it -Hans */ - if (ei->i_prealloc_count > 0) { - ei->i_prealloc_count--; - *free_blocknrs = ei->i_prealloc_block++; + return 1; +} - /* if no more preallocated blocks, remove inode from list */ - if (! ei->i_prealloc_count) { - list_del_init(&ei->i_prealloc_list); - } - - return ret; - } +static void inline hundredth_slices (reiserfs_blocknr_hint_t * hint) +{ + struct key * key = &hint->key; + unsigned long slice_start; - /* else get a new preallocation for the file */ - reiserfs_discard_prealloc (th, p_s_inode); - /* this uses the last preallocated block as the search_start. discard - ** prealloc does not zero out this number. - */ - if (search_start <= ei->i_prealloc_block) { - search_start = ei->i_prealloc_block; - } + slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100); + if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) { + hint->search_start = slice_start; + } +} - /* doing the compare again forces search_start to be >= the border, - ** even if the file already had prealloction done. This seems extra, - ** and should probably be removed - */ - if ( search_start < border ) search_start=border; - - /* If the disk free space is already below 10% we should - ** start looking for the free blocks from the beginning - ** of the partition, before the border line. - */ - if ( SB_FREE_BLOCKS(th->t_super) <= (SB_BLOCK_COUNT(th->t_super) / 10) ) { - search_start=saved_search_start; - } +static void inline determine_search_start(reiserfs_blocknr_hint_t *hint, + int amount_needed) +{ + struct super_block *s = hint->th->t_super; + hint->beg = 0; + hint->end = SB_BLOCK_COUNT(s) - 1; + + /* This is former border algorithm. Now with tunable border offset */ + if (concentrating_formatted_nodes(s)) + set_border_in_hint(s, hint); + +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + /* whenever we create a new directory, we displace it. At first we will + hash for location, later we might look for a moderately empty place for + it */ + if (displacing_new_packing_localities(s) + && hint->th->displace_new_blocks) { + displace_new_packing_locality(hint); + + /* we do not continue determine_search_start, + * if new packing locality is being displaced */ + return; + } +#endif + + /* all persons should feel encouraged to add more special cases here and + * test them */ - *free_blocknrs = 0; - blks = PREALLOCATION_SIZE-1; - for (blks_gotten=0; blks_gotten 0)/*must_be_contiguous*/) ; - /* if we didn't find a block this time, adjust blks to reflect - ** the actual number of blocks allocated - */ - if (ret != CARRY_ON) { - blks = blks_gotten > 0 ? (blks_gotten - 1) : 0 ; - break ; + if (displacing_large_files(s) && !hint->formatted_node + && this_blocknr_allocation_would_make_it_a_large_file(hint)) { + displace_large_file(hint); + return; } - allocated[blks_gotten]= *free_blocknrs; -#ifdef CONFIG_REISERFS_CHECK - if ( (blks_gotten>0) && (allocated[blks_gotten] - allocated[blks_gotten-1]) != 1 ) { - /* this should be caught by new_blocknrs now, checking code */ - reiserfs_warning("yura-1, reiserfs_new_unf_blocknrs2: pre-allocated not contiguous set of blocks!\n") ; - reiserfs_free_block(th, allocated[blks_gotten]); - blks = blks_gotten-1; - break; + + /* attempt to copy a feature from old block allocator code */ + if (TEST_OPTION(old_hashed_relocation, s) && !hint->formatted_node) { + old_hashed_relocation(hint); } -#endif - if (blks_gotten==0) { - ei->i_prealloc_block = *free_blocknrs; + + /* if none of our special cases is relevant, use the left neighbor in the + tree order of the new node we are allocating for */ + if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) { + hash_formatted_node(hint); + return; + } + + get_left_neighbor(hint); + + /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, + new blocks are displaced based on directory ID. Also, if suggested search_start + is less than last preallocated block, we start searching from it, assuming that + HDD dataflow is faster in forward direction */ + if ( TEST_OPTION(old_way, s)) { + if (!hint->formatted_node) { + if ( !reiserfs_hashed_relocation(s)) + old_way(hint); + else if (!reiserfs_no_unhashed_relocation(s)) + old_hashed_relocation(hint); + + if ( hint->inode && hint->search_start < REISERFS_I(hint->inode)->i_prealloc_block) + hint->search_start = REISERFS_I(hint->inode)->i_prealloc_block; + } + return; } - search_start = *free_blocknrs; - *free_blocknrs = 0; - } - ei->i_prealloc_count = blks; - *free_blocknrs = ei->i_prealloc_block; - ei->i_prealloc_block++; - - /* if inode has preallocated blocks, link him to list */ - if (ei->i_prealloc_count) { - list_add(&ei->i_prealloc_list, - &SB_JOURNAL(th->t_super)->j_prealloc_list); - } - /* we did actually manage to get 1 block */ - if (ret != CARRY_ON && allocated[0] > 0) { - return CARRY_ON ; - } - /* NO_MORE_UNUSED_CONTIGUOUS_BLOCKS should only mean something to - ** the preallocation code. The rest of the filesystem asks for a block - ** and should either get it, or know the disk is full. The code - ** above should never allow ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCK, - ** as it doesn't send for_prealloc = 1 to do_reiserfs_new_blocknrs - ** unless it has already successfully allocated at least one block. - ** Just in case, we translate into a return value the rest of the - ** filesystem can understand. - ** - ** It is an error to change this without making the - ** rest of the filesystem understand NO_MORE_UNUSED_CONTIGUOUS_BLOCKS - ** If you consider it a bug to return NO_DISK_SPACE here, fix the rest - ** of the fs first. - */ - if (ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCKS) { -#ifdef CONFIG_REISERFS_CHECK - reiserfs_warning("reiser-2015: this shouldn't happen, may cause false out of disk space error"); -#endif - return NO_DISK_SPACE; - } - return ret; + + /* This is an approach proposed by Hans */ + if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) { + hundredth_slices(hint); + return; + } + + if (TEST_OPTION(old_hashed_relocation, s)) + old_hashed_relocation(hint); + if (TEST_OPTION(new_hashed_relocation, s)) + new_hashed_relocation(hint); + return; } +static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) +{ + /* make minimum size a mount option and benchmark both ways */ + /* we preallocate blocks only for regular files, specific size */ + /* benchmark preallocating always and see what happens */ -static void __discard_prealloc (struct reiserfs_transaction_handle * th, - struct reiserfs_inode_info *ei) + hint->prealloc_size = 0; + + if (!hint->formatted_node && hint->preallocate) { + if (S_ISREG(hint->inode->i_mode) + && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize) + hint->prealloc_size = REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocsize - 1; + } + return CARRY_ON; +} + +/* XXX I know it could be merged with upper-level function; + but may be result function would be too complex. */ +static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint, + b_blocknr_t * new_blocknrs, + b_blocknr_t start, b_blocknr_t finish, + int amount_needed, int prealloc_size) { - unsigned long save = ei->i_prealloc_block ; - while (ei->i_prealloc_count > 0) { - reiserfs_free_prealloc_block(th,ei->i_prealloc_block); - ei->i_prealloc_block++; - ei->i_prealloc_count --; - } - ei->i_prealloc_block = save; - list_del_init(&(ei->i_prealloc_list)); + int rest = amount_needed; + int nr_allocated; + + while (rest > 0) { + nr_allocated = scan_bitmap (hint->th, &start, finish, 1, + rest + prealloc_size, !hint->formatted_node, + hint->block); + + if (nr_allocated == 0) /* no new blocks allocated, return */ + break; + + /* fill free_blocknrs array first */ + while (rest > 0 && nr_allocated > 0) { + * new_blocknrs ++ = start ++; + rest --; nr_allocated --; + } + + /* do we have something to fill prealloc. array also ? */ + if (nr_allocated > 0) { + /* it means prealloc_size was greater that 0 and we do preallocation */ + list_add(&REISERFS_I(hint->inode)->i_prealloc_list, + &SB_JOURNAL(hint->th->t_super)->j_prealloc_list); + REISERFS_I(hint->inode)->i_prealloc_block = start; + REISERFS_I(hint->inode)->i_prealloc_count = nr_allocated; + break; + } + } + + return (amount_needed - rest); } +static inline int blocknrs_and_prealloc_arrays_from_search_start + (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed) +{ + struct super_block *s = hint->th->t_super; + b_blocknr_t start = hint->search_start; + b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; + int second_pass = 0; + int nr_allocated = 0; + + determine_prealloc_size(hint); + while((nr_allocated + += allocate_without_wrapping_disk(hint, new_blocknrs + nr_allocated, start, finish, + amount_needed - nr_allocated, hint->prealloc_size)) + < amount_needed) { + + /* not all blocks were successfully allocated yet*/ + if (second_pass) { /* it was a second pass; we must free all blocks */ + while (nr_allocated --) + reiserfs_free_block(hint->th, new_blocknrs[nr_allocated]); -void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, - struct inode * inode) + return NO_DISK_SPACE; + } else { /* refine search parameters for next pass */ + second_pass = 1; + finish = start; + start = 0; + continue; + } + } + return CARRY_ON; +} + +/* grab new blocknrs from preallocated list */ +/* return amount still needed after using them */ +static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint, + b_blocknr_t *new_blocknrs, int amount_needed) { - struct reiserfs_inode_info *ei = REISERFS_I(inode); -#ifdef CONFIG_REISERFS_CHECK - if (ei->i_prealloc_count < 0) - reiserfs_warning("zam-4001:%s inode has negative prealloc blocks count.\n", __FUNCTION__); -#endif - if (ei->i_prealloc_count > 0) { - __discard_prealloc(th, ei); - } + struct inode * inode = hint->inode; + + if (REISERFS_I(inode)->i_prealloc_count > 0) { + while (amount_needed) { + + *new_blocknrs ++ = REISERFS_I(inode)->i_prealloc_block ++; + REISERFS_I(inode)->i_prealloc_count --; + + amount_needed --; + + if (REISERFS_I(inode)->i_prealloc_count <= 0) { + list_del(&REISERFS_I(inode)->i_prealloc_list); + break; + } + } } + /* return amount still needed after using preallocated blocks */ + return amount_needed; +} -void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th) +int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, + b_blocknr_t * new_blocknrs, int amount_needed, + int reserved_by_us /* Amount of blocks we have + already reserved */) { - struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; - - while (!list_empty(plist)) { - struct reiserfs_inode_info *ei; - ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list); -#ifdef CONFIG_REISERFS_CHECK - if (!ei->i_prealloc_count) { - reiserfs_warning("zam-4001:%s: inode is in prealloc list but has no preallocated blocks.\n", __FUNCTION__); + int initial_amount_needed = amount_needed; + int ret; + + /* Check if there is enough space, taking into account reserved space */ + if ( SB_FREE_BLOCKS(hint->th->t_super) - REISERFS_SB(hint->th->t_super)->reserved_blocks < + amount_needed - reserved_by_us) + return NO_DISK_SPACE; + /* should this be if !hint->inode && hint->preallocate? */ + /* do you mean hint->formatted_node can be removed ? - Zam */ + /* hint->formatted_node cannot be removed because we try to access + inode information here, and there is often no inode assotiated with + metadata allocations - green */ + + if (!hint->formatted_node && hint->preallocate) { + amount_needed = use_preallocated_list_if_available + (hint, new_blocknrs, amount_needed); + if (amount_needed == 0) /* all blocknrs we need we got from + prealloc. list */ + return CARRY_ON; + new_blocknrs += (initial_amount_needed - amount_needed); + } + + /* find search start and save it in hint structure */ + determine_search_start(hint, amount_needed); + + /* allocation itself; fill new_blocknrs and preallocation arrays */ + ret = blocknrs_and_prealloc_arrays_from_search_start + (hint, new_blocknrs, amount_needed); + + /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we + * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second + * variant) */ + + if (ret != CARRY_ON) { + while (amount_needed ++ < initial_amount_needed) { + reiserfs_free_block(hint->th, *(--new_blocknrs)); } -#endif - __discard_prealloc(th, ei); } + return ret; +} + +/* These 2 functions are here to provide blocks reservation to the rest of kernel */ +/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure + there are actually this much blocks on the FS available */ +void reiserfs_claim_blocks_to_be_allocated( + struct super_block *sb, /* super block of + filesystem where + blocks should be + reserved */ + int blocks /* How much to reserve */ + ) +{ + + /* Fast case, if reservation is zero - exit immediately. */ + if ( !blocks ) + return; + + REISERFS_SB(sb)->reserved_blocks += blocks; +} + +/* Unreserve @blocks amount of blocks in fs pointed by @sb */ +void reiserfs_release_claimed_blocks( + struct super_block *sb, /* super block of + filesystem where + blocks should be + reserved */ + int blocks /* How much to unreserve */ + ) +{ + + /* Fast case, if unreservation is zero - exit immediately. */ + if ( !blocks ) + return; + + REISERFS_SB(sb)->reserved_blocks -= blocks; + RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?"); } -#endif diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 0e6eeb845aaf..2079e51faca4 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -795,8 +795,8 @@ static int get_empty_nodes( else /* If we have enough already then there is nothing to do. */ return CARRY_ON; - if ( reiserfs_new_blocknrs (p_s_tb->transaction_handle, a_n_blocknrs, - PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_blocknr, n_amount_needed) == NO_DISK_SPACE ) + if ( reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs, + n_amount_needed) == NO_DISK_SPACE ) return NO_DISK_SPACE; /* for each blocknumber we just got, get a buffer and stick it on FEB */ diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c index 80f8c8d1ba0e..fb0c8ea1e189 100644 --- a/fs/reiserfs/hashes.c +++ b/fs/reiserfs/hashes.c @@ -19,6 +19,7 @@ // #include +#include @@ -57,7 +58,6 @@ u32 keyed_hash(const signed char *msg, int len) u32 pad; int i; - // assert(len >= 0 && len < 256); pad = (u32)len | ((u32)len << 8); @@ -92,7 +92,7 @@ u32 keyed_hash(const signed char *msg, int len) { //assert(len < 16); if (len >= 16) - *(int *)0 = 0; + BUG(); a = (u32)msg[ 0] | (u32)msg[ 1] << 8 | @@ -118,7 +118,7 @@ u32 keyed_hash(const signed char *msg, int len) { //assert(len < 12); if (len >= 12) - *(int *)0 = 0; + BUG(); a = (u32)msg[ 0] | (u32)msg[ 1] << 8 | (u32)msg[ 2] << 16| @@ -139,7 +139,7 @@ u32 keyed_hash(const signed char *msg, int len) { //assert(len < 8); if (len >= 8) - *(int *)0 = 0; + BUG(); a = (u32)msg[ 0] | (u32)msg[ 1] << 8 | (u32)msg[ 2] << 16| @@ -156,7 +156,7 @@ u32 keyed_hash(const signed char *msg, int len) { //assert(len < 4); if (len >= 4) - *(int *)0 = 0; + BUG(); a = b = c = d = pad; for(i = 0; i < len; i++) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9860253acca3..5b2862949193 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -156,33 +156,6 @@ static inline void fix_tail_page_for_writing(struct page *page) { } } - - - -/* we need to allocate a block for new unformatted node. Try to figure out - what point in bitmap reiserfs_new_blocknrs should start from. */ -static b_blocknr_t find_tag (struct buffer_head * bh, struct item_head * ih, - __u32 * item, int pos_in_item) -{ - __u32 block ; - if (!is_indirect_le_ih (ih)) - /* something more complicated could be here */ - return bh->b_blocknr; - - /* for indirect item: go to left and look for the first non-hole entry in - the indirect item */ - if (pos_in_item == I_UNFM_NUM (ih)) - pos_in_item --; - while (pos_in_item >= 0) { - block = get_block_num(item, pos_in_item) ; - if (block) - return block ; - pos_in_item --; - } - return bh->b_blocknr; -} - - /* reiserfs_get_block does not need to allocate a block only if it has been done already or non-hole position has been found in the indirect item */ static inline int allocation_needed (int retval, b_blocknr_t allocated, @@ -341,10 +314,10 @@ research: ** kmap schedules */ if (!p) { - p = (char *)kmap(bh_result->b_page) ; - if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { - goto research; - } + p = (char *)kmap(bh_result->b_page) ; + if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { + goto research; + } } p += offset ; memset (p, 0, inode->i_sb->s_blocksize); @@ -505,24 +478,24 @@ out: } static inline int _allocate_block(struct reiserfs_transaction_handle *th, + long block, struct inode *inode, b_blocknr_t *allocated_block_nr, - unsigned long tag, + struct path * path, int flags) { #ifdef REISERFS_PREALLOCATE if (!(flags & GET_BLOCK_NO_ISEM)) { - return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, tag); + return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block); } #endif - return reiserfs_new_unf_blocknrs (th, allocated_block_nr, tag); + return reiserfs_new_unf_blocknrs (th, allocated_block_nr, path, block); } int reiserfs_get_block (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { int repeat, retval; - unsigned long tag; b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long INITIALIZE_PATH(path); int pos_in_item; @@ -601,7 +574,6 @@ int reiserfs_get_block (struct inode * inode, sector_t block, if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) { /* we have to allocate block for the unformatted node */ - tag = find_tag (bh, ih, item, pos_in_item); if (!transaction_started) { pathrelse(&path) ; journal_begin(&th, inode->i_sb, jbegin_count) ; @@ -610,7 +582,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, goto research ; } - repeat = _allocate_block(&th, inode, &allocated_block_nr, tag, create); + repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create); if (repeat == NO_DISK_SPACE) { /* restart the transaction to give the journal a chance to free @@ -618,7 +590,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ** research if we succeed on the second try */ restart_transaction(&th, inode, &path) ; - repeat = _allocate_block(&th, inode,&allocated_block_nr,tag,create); + repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create); if (repeat != NO_DISK_SPACE) { goto research ; @@ -769,8 +741,8 @@ int reiserfs_get_block (struct inode * inode, sector_t block, add_to_flushlist(inode, unbh) ; /* mark it dirty now to prevent commit_write from adding - ** this buffer to the inode's dirty buffer list - */ + ** this buffer to the inode's dirty buffer list + */ /* * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). * It's still atomic, but it sets the page dirty too, @@ -778,7 +750,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, * VM (which was also the case with __mark_buffer_dirty()) */ mark_buffer_dirty(unbh) ; - + //inode->i_blocks += inode->i_sb->s_blocksize / 512; //mark_tail_converted (inode); } else { @@ -1591,6 +1563,10 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th, set_inode_sd_version (inode, STAT_DATA_V2); /* insert the stat data into the tree */ +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + if (REISERFS_I(dir)->new_packing_locality) + th->displace_new_blocks = 1; +#endif retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd)); if (retval) { err = retval; @@ -1598,6 +1574,10 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th, goto out_bad_inode; } +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + if (!th->displace_new_blocks) + REISERFS_I(dir)->new_packing_locality = 0; +#endif if (S_ISDIR(mode)) { /* insert item with "." and ".." */ retval = reiserfs_new_directory (th, &ih, &path_to_key, dir); @@ -1773,16 +1753,16 @@ void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) { reiserfs_update_inode_transaction(p_s_inode) ; windex = push_journal_writer("reiserfs_vfs_truncate_file") ; if (update_timestamps) - /* we are doing real truncate: if the system crashes before the last - transaction of truncating gets committed - on reboot the file - either appears truncated properly or not truncated at all */ - add_save_link (&th, p_s_inode, 1); + /* we are doing real truncate: if the system crashes before the last + transaction of truncating gets committed - on reboot the file + either appears truncated properly or not truncated at all */ + add_save_link (&th, p_s_inode, 1); reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ; pop_journal_writer(windex) ; journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ; if (update_timestamps) - remove_save_link (p_s_inode, 1/* truncate */); + remove_save_link (p_s_inode, 1/* truncate */); if (page) { length = offset & (blocksize - 1) ; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 07c2f19adfe0..3fbb2199de37 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -510,14 +510,12 @@ int dump_journal_writers(void) { ** */ int reiserfs_in_journal(struct super_block *p_s_sb, - unsigned long bl, int search_all, + int bmap_nr, int bit_nr, int search_all, unsigned long *next_zero_bit) { struct reiserfs_journal_cnode *cn ; struct reiserfs_list_bitmap *jb ; int i ; - int bmap_nr = bl / (p_s_sb->s_blocksize << 3) ; - int bit_nr = bl % (p_s_sb->s_blocksize << 3) ; - int tmp_bit ; + unsigned long bl; *next_zero_bit = 0 ; /* always start this at zero. */ @@ -537,15 +535,15 @@ int reiserfs_in_journal(struct super_block *p_s_sb, jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ; if (jb->journal_list && jb->bitmaps[bmap_nr] && test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) { - tmp_bit = find_next_zero_bit((unsigned long *) + *next_zero_bit = find_next_zero_bit((unsigned long *) (jb->bitmaps[bmap_nr]->data), p_s_sb->s_blocksize << 3, bit_nr+1) ; - *next_zero_bit = bmap_nr * (p_s_sb->s_blocksize << 3) + tmp_bit ; return 1 ; } } } + bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; /* is it in any old transactions? */ if (search_all && (cn = get_journal_hash_dev(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_hash_table, bl))) { return 1; @@ -1820,7 +1818,8 @@ static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_tas jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ; flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ; - if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 && + + if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 && atomic_read(&(jl->j_commit_left)) == 0) { kupdate_one_transaction(ct->p_s_sb, jl) ; } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 6ff54b618291..c1c1505d9a04 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -661,6 +661,10 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode) struct reiserfs_transaction_handle th ; int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ + REISERFS_I(dir)->new_packing_locality = 1; +#endif mode = S_IFDIR | mode; if (!(inode = new_inode(dir->i_sb))) { return -ENOMEM ; diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 5026e1acafa7..00ec09d9bc6e 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -111,7 +111,7 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, #define SF( x ) ( r -> x ) #define SFP( x ) SF( s_proc_info_data.x ) #define SFPL( x ) SFP( x[ level ] ) -#define SFPF( x ) SFP( find_forward.x ) +#define SFPF( x ) SFP( scan_bitmap.x ) #define SFPJ( x ) SFP( journal.x ) #define D2C( x ) le16_to_cpu( x ) @@ -184,7 +184,7 @@ int reiserfs_super_in_proc( char *buffer, char **start, off_t offset, reiserfs_no_unhashed_relocation( sb ) ? "NO_UNHASHED_RELOCATION " : "", reiserfs_hashed_relocation( sb ) ? "UNHASHED_RELOCATION " : "", reiserfs_test4( sb ) ? "TEST4 " : "", - dont_have_tails( sb ) ? "NO_TAILS " : "TAILS ", + have_large_tails( sb ) ? "TAILS " : have_small_tails(sb)?"SMALL_TAILS ":"NO_TAILS ", replay_only( sb ) ? "REPLAY_ONLY " : "", reiserfs_dont_log( sb ) ? "DONT_LOG " : "LOG ", convert_reiserfs( sb ) ? "CONV " : "", @@ -314,27 +314,30 @@ int reiserfs_bitmap_in_proc( char *buffer, char **start, off_t offset, r = REISERFS_SB(sb); len += sprintf( &buffer[ len ], "free_block: %lu\n" - "find_forward:" - " wait" - " bmap" - " retry" - " journal_hint" - " journal_out" + " scan_bitmap:" + " wait" + " bmap" + " retry" + " stolen" + " journal_hint" + "journal_nohint" "\n" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" - " %12lu" + " %14lu" + " %14lu" + " %14lu" + " %14lu" + " %14lu" + " %14lu" + " %14lu" "\n", SFP( free_block ), SFPF( call ), SFPF( wait ), SFPF( bmap ), SFPF( retry ), + SFPF( stolen ), SFPF( in_journal_hint ), - SFPF( in_journal_out ) ); + SFPF( in_journal_nohint ) ); procinfo_epilogue( sb ); return reiserfs_proc_tail( len, buffer, start, offset, count, eof ); diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 1b4581806a8f..c357fa53001e 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -19,7 +19,8 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new) { struct reiserfs_super_block * sb; - struct buffer_head ** bitmap, * bh; + struct reiserfs_bitmap_info *bitmap; + struct buffer_head * bh; struct reiserfs_transaction_handle th; unsigned int bmap_nr_new, bmap_nr; unsigned int block_r_new, block_r; @@ -103,26 +104,29 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new) /* allocate additional bitmap blocks, reallocate array of bitmap * block pointers */ - bitmap = reiserfs_kmalloc(sizeof(struct buffer_head *) * bmap_nr_new, GFP_KERNEL, s); + bitmap = vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); if (!bitmap) { printk("reiserfs_resize: unable to allocate memory.\n"); return -ENOMEM; } + memset (bitmap, 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); for (i = 0; i < bmap_nr; i++) bitmap[i] = SB_AP_BITMAP(s)[i]; for (i = bmap_nr; i < bmap_nr_new; i++) { - bitmap[i] = sb_getblk(s, i * s->s_blocksize * 8); - memset(bitmap[i]->b_data, 0, sb_blocksize(sb)); - reiserfs_test_and_set_le_bit(0, bitmap[i]->b_data); - - mark_buffer_dirty(bitmap[i]) ; - set_buffer_uptodate(bitmap[i]); - ll_rw_block(WRITE, 1, bitmap + i); - wait_on_buffer(bitmap[i]); + bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8); + memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb)); + reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data); + + mark_buffer_dirty(bitmap[i].bh) ; + set_buffer_uptodate(bitmap[i].bh); + ll_rw_block(WRITE, 1, &bitmap[i].bh); + wait_on_buffer(bitmap[i].bh); + // update bitmap_info stuff + bitmap[i].first_zero_hint=1; + bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; } /* free old bitmap blocks array */ - reiserfs_kfree(SB_AP_BITMAP(s), - sizeof(struct buffer_head *) * bmap_nr, s); + vfree(SB_AP_BITMAP(s)); SB_AP_BITMAP(s) = bitmap; } @@ -130,18 +134,26 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new) journal_begin(&th, s, 10); /* correct last bitmap blocks in old and new disk layout */ - reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1], 1); + reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1); for (i = block_r; i < s->s_blocksize * 8; i++) reiserfs_test_and_clear_le_bit(i, - SB_AP_BITMAP(s)[bmap_nr - 1]->b_data); - journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1]); + SB_AP_BITMAP(s)[bmap_nr - 1].bh->b_data); + SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r; + if ( !SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint) + SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r; + + journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh); - reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1], 1); + reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) reiserfs_test_and_set_le_bit(i, - SB_AP_BITMAP(s)[bmap_nr_new - 1]->b_data); - journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1]); + SB_AP_BITMAP(s)[bmap_nr_new - 1].bh->b_data); + journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh); + SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -= s->s_blocksize * 8 - block_r_new; + /* Extreme case where last bitmap is the only valid block in itself. */ + if ( !SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count ) + SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0; /* update super */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; free_blocks = SB_FREE_BLOCKS(s); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index d67e6109b17f..560f1f336111 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1620,9 +1620,9 @@ int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, do_balance(&s_cut_balance, NULL, NULL, c_mode); if ( n_is_inode_locked ) { - /* we've done an indirect->direct conversion. when the data block - ** was freed, it was removed from the list of blocks that must - ** be flushed before the transaction commits, so we don't need to + /* we've done an indirect->direct conversion. when the data block + ** was freed, it was removed from the list of blocks that must + ** be flushed before the transaction commits, so we don't need to ** deal with it here. */ REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ; @@ -1813,6 +1813,9 @@ int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, int retval; init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + s_paste_balance.key = p_s_key->on_disk_key; +#endif while ( (retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == REPEAT_SEARCH ) { /* file system changed while we were in the fix_nodes */ @@ -1823,7 +1826,7 @@ int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, goto error_out ; } if (retval == POSITION_FOUND) { - reiserfs_warning ("PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key); + reiserfs_warning ("PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists\n", p_s_key); retval = -EEXIST ; goto error_out ; } @@ -1858,6 +1861,9 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, int retval; init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + s_ins_balance.key = key->on_disk_key; +#endif /* if (p_c_body == 0) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7f1b517547e2..bb702e26a4c2 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -374,9 +375,9 @@ static void reiserfs_put_super (struct super_block * s) journal_release(&th, s) ; for (i = 0; i < SB_BMAP_NR (s); i ++) - brelse (SB_AP_BITMAP (s)[i]); + brelse (SB_AP_BITMAP (s)[i].bh); - reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s); + vfree (SB_AP_BITMAP (s)); brelse (SB_BUFFER_WITH_SB (s)); @@ -387,6 +388,11 @@ static void reiserfs_put_super (struct super_block * s) REISERFS_SB(s)->s_kmallocs); } + if (REISERFS_SB(s)->reserved_blocks != 0) { + reiserfs_warning ("green-2005: reiserfs_put_super: reserved blocks left %d\n", + REISERFS_SB(s)->reserved_blocks); + } + reiserfs_proc_unregister( s, "journal" ); reiserfs_proc_unregister( s, "oidmap" ); reiserfs_proc_unregister( s, "on-disk-super" ); @@ -518,6 +524,13 @@ const arg_desc_t balloc[] = { {NULL, -1} }; +const arg_desc_t tails[] = { + {"on", REISERFS_LARGETAIL}, + {"off", -1}, + {"small", REISERFS_SMALLTAIL}, + {NULL, 0} +}; + /* proceed only one option from a list *cur - string containing of mount options opts - array of options which are accepted @@ -525,7 +538,7 @@ const arg_desc_t balloc[] = { in the input - pointer to the argument is stored here bit_flags - if option requires to set a certain bit - it is set here return -1 if unknown option is found, opt->arg_required otherwise */ -static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg, +static int reiserfs_getopt ( struct super_block * s, char ** cur, opt_desc_t * opts, char ** opt_arg, unsigned long * bit_flags) { char * p; @@ -547,7 +560,16 @@ static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg, *(*cur) = '\0'; (*cur) ++; } - + + if ( !strncmp (p, "alloc=", 6) ) { + /* Ugly special case, probably we should redo options parser so that + it can understand several arguments for some options, also so that + it can fill several bitfields with option values. */ + reiserfs_parse_alloc_options( s, p + 6); + return 0; + } + + /* for every option in the list */ for (opt = opts; opt->option_name; opt ++) { if (!strncmp (p, opt->option_name, strlen (opt->option_name))) { @@ -612,7 +634,7 @@ static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg, /* returns 0 if something is wrong in option string, 1 - otherwise */ -static int reiserfs_parse_options (char * options, /* string given via mount's -o */ +static int reiserfs_parse_options (struct super_block * s, char * options, /* string given via mount's -o */ unsigned long * mount_options, /* after the parsing phase, contains the collection of bitflags defining what @@ -624,14 +646,14 @@ static int reiserfs_parse_options (char * options, /* string given via mount's - char * arg = NULL; char * pos; opt_desc_t opts[] = { - {"notail", 0, 0, NOTAIL}, + {"tails", 't', tails, -1}, + {"notail", 0, 0, -1}, /* Compatibility stuff, so that -o notail +for old setups still work */ {"conv", 0, 0, REISERFS_CONVERT}, {"attrs", 0, 0, REISERFS_ATTRS}, {"nolog", 0, 0, -1}, {"replayonly", 0, 0, REPLAYONLY}, - {"block-allocator", 'a', balloc, -1}, - {"resize", 'r', 0, -1}, {"jdev", 'j', 0, -1}, {NULL, 0, 0, -1} @@ -642,9 +664,12 @@ static int reiserfs_parse_options (char * options, /* string given via mount's - /* use default configuration: create tails, journaling on, no conversion to newest format */ return 1; + else + /* Drop defaults to zeroes */ + *mount_options = 0; for (pos = options; pos; ) { - c = reiserfs_getopt (&pos, opts, &arg, mount_options); + c = reiserfs_getopt (s, &pos, opts, &arg, mount_options); if (c == -1) /* wrong option is given */ return 0; @@ -681,7 +706,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a rs = SB_DISK_SUPER_BLOCK (s); - if (!reiserfs_parse_options(arg, &mount_options, &blocks, NULL)) + if (!reiserfs_parse_options(s, arg, &mount_options, &blocks, NULL)) return -EINVAL; if(blocks) { @@ -731,32 +756,84 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a return 0; } +/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk. + * @sb - superblock for this filesystem + * @bi - the bitmap info to be loaded. Requires that bi->bh is valid. + * + * This routine counts how many free bits there are, finding the first zero + * as a side effect. Could also be implemented as a loop of test_bit() calls, or + * a loop of find_first_zero_bit() calls. This implementation is similar to + * find_first_zero_bit(), but doesn't return after it finds the first bit. + * Should only be called on fs mount, but should be fairly efficient anyways. + * + * bi->first_zero_hint is considered unset if it == 0, since the bitmap itself + * will * invariably occupt block 0 represented in the bitmap. The only + * exception to this is when free_count also == 0, since there will be no + * free blocks at all. + */ + +static void load_bitmap_info_data (struct super_block *sb, + struct reiserfs_bitmap_info *bi) +{ + unsigned long *cur = (unsigned long *)bi->bh->b_data; + + while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) { + + /* No need to scan if all 0's or all 1's. + * Since we're only counting 0's, we can simply ignore all 1's */ + if (*cur == 0) { + if (bi->first_zero_hint == 0) { + bi->first_zero_hint = ((char *)cur - bi->bh->b_data) << 3; + } + bi->free_count += sizeof(unsigned long)*8; + } else if (*cur != ~0L) { + int b; + for (b = 0; b < sizeof(unsigned long)*8; b++) { + if (!reiserfs_test_le_bit (b, cur)) { + bi->free_count ++; + if (bi->first_zero_hint == 0) + bi->first_zero_hint = + (((char *)cur - bi->bh->b_data) << 3) + b; + } + } + } + cur ++; + } +#ifdef CONFIG_REISERFS_CHECK +// This outputs a lot of unneded info on big FSes +// reiserfs_warning ("bitmap loaded from block %d: %d free blocks\n", +// bi->bh->b_blocknr, bi->free_count); +#endif +} + static int read_bitmaps (struct super_block * s) { int i, bmap_nr; - SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * SB_BMAP_NR(s), GFP_NOFS, s); + SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); if (SB_AP_BITMAP (s) == 0) return 1; + memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); for (i = 0, bmap_nr = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { - SB_AP_BITMAP (s)[i] = sb_getblk(s, bmap_nr); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) - ll_rw_block(READ, 1, SB_AP_BITMAP(s) + i); + SB_AP_BITMAP (s)[i].bh = sb_getblk(s, bmap_nr); + if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) + ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh); } for (i = 0; i < SB_BMAP_NR(s); i++) { - wait_on_buffer(SB_AP_BITMAP (s)[i]); - if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) { + wait_on_buffer(SB_AP_BITMAP (s)[i].bh); + if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { reiserfs_warning("sh-2029: reiserfs read_bitmaps: " "bitmap block (#%lu) reading failed\n", - SB_AP_BITMAP(s)[i]->b_blocknr); + SB_AP_BITMAP(s)[i].bh->b_blocknr); for (i = 0; i < SB_BMAP_NR(s); i++) - brelse(SB_AP_BITMAP(s)[i]); - reiserfs_kfree(SB_AP_BITMAP(s), sizeof(struct buffer_head *) * SB_BMAP_NR(s), s); + brelse(SB_AP_BITMAP(s)[i].bh); + vfree(SB_AP_BITMAP(s)); SB_AP_BITMAP(s) = NULL; return 1; } + load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); } return 0; } @@ -768,16 +845,17 @@ static int read_old_bitmaps (struct super_block * s) int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ /* read true bitmap */ - SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * sb_bmap_nr(rs), GFP_NOFS, s); + SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); if (SB_AP_BITMAP (s) == 0) return 1; - memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * sb_bmap_nr(rs)); + memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); for (i = 0; i < sb_bmap_nr(rs); i ++) { - SB_AP_BITMAP (s)[i] = sb_bread (s, bmp1 + i); - if (!SB_AP_BITMAP (s)[i]) + SB_AP_BITMAP (s)[i].bh = sb_bread (s, bmp1 + i); + if (!SB_AP_BITMAP (s)[i].bh) return 1; + load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); } return 0; @@ -790,7 +868,7 @@ void check_bitmap (struct super_block * s) char * buf; while (i < SB_BLOCK_COUNT (s)) { - buf = SB_AP_BITMAP (s)[i / (s->s_blocksize * 8)]->b_data; + buf = SB_AP_BITMAP (s)[i / (s->s_blocksize * 8)].bh->b_data; if (!reiserfs_test_le_bit (i % (s->s_blocksize * 8), buf)) free ++; i ++; @@ -899,10 +977,11 @@ static int reread_meta_blocks(struct super_block *s) { } for (i = 0; i < SB_BMAP_NR(s) ; i++) { - ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i])) ; - wait_on_buffer(SB_AP_BITMAP(s)[i]) ; - if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) { - printk("reread_meta_blocks, error reading bitmap block number %d at %ld\n", i, SB_AP_BITMAP(s)[i]->b_blocknr) ; + ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)) ; + wait_on_buffer(SB_AP_BITMAP(s)[i].bh) ; + if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { + printk("reread_meta_blocks, error reading bitmap block number %d at + %ld\n", i, SB_AP_BITMAP(s)[i].bh->b_blocknr) ; return 1 ; } } @@ -1087,9 +1166,17 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent) } s->u.generic_sbp = sbi; memset (sbi, 0, sizeof (struct reiserfs_sb_info)); + /* Set default values for options: non-aggressive tails */ + REISERFS_SB(s)->s_mount_opt = ( 1 << REISERFS_SMALLTAIL ); + /* default block allocator option: skip_busy */ + REISERFS_SB(s)->s_alloc_options.bits = ( 1 << 5); + /* If file grew past 4 blocks, start preallocation blocks for it. */ + REISERFS_SB(s)->s_alloc_options.preallocmin = 4; + /* Preallocate by 8 blocks (9-1) at once */ + REISERFS_SB(s)->s_alloc_options.preallocsize = 9; jdev_name = NULL; - if (reiserfs_parse_options ((char *) data, &(sbi->s_mount_opt), &blocks, &jdev_name) == 0) { + if (reiserfs_parse_options (s, (char *) data, &(sbi->s_mount_opt), &blocks, &jdev_name) == 0) { goto error; } @@ -1236,10 +1323,10 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent) if (SB_DISK_SUPER_BLOCK (s)) { for (j = 0; j < SB_BMAP_NR (s); j ++) { if (SB_AP_BITMAP (s)) - brelse (SB_AP_BITMAP (s)[j]); + brelse (SB_AP_BITMAP (s)[j].bh); } if (SB_AP_BITMAP (s)) - reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s); + vfree (SB_AP_BITMAP (s)); } if (SB_BUFFER_WITH_SB (s)) brelse(SB_BUFFER_WITH_SB (s)); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index b3a2a5a1a78c..a5693b3f4a3f 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -59,7 +59,8 @@ #define USE_INODE_GENERATION_COUNTER #define REISERFS_PREALLOCATE -#define PREALLOCATION_SIZE 8 +#define DISPLACE_NEW_PACKING_LOCALITIES +#define PREALLOCATION_SIZE 9 /* n must be power of 2 */ #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) @@ -326,7 +327,7 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) time cost for a 4 block file and saves an amount of space that is less significant as a percentage of space, or so goes the hypothesis. -Hans */ -#define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \ +#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ (\ (!(n_tail_size)) || \ (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ @@ -339,6 +340,18 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ ) +/* Another strategy for tails, this one means only create a tail if all the + file would fit into one DIRECT item. + Primary intention for this one is to increase performance by decreasing + seeking. +*/ +#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ +(\ + (!(n_tail_size)) || \ + (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \ +) + + /* * values for s_umount_state field @@ -1433,6 +1446,10 @@ struct tree_balance int fs_gen; /* saved value of `reiserfs_generation' counter see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ +#ifdef DISPLACE_NEW_PACKING_LOCALITIES + struct key key; /* key pointer, to pass to block allocator or + another low-level subsystem */ +#endif } ; /* These are modes of balancing */ @@ -1673,7 +1690,7 @@ int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block int push_journal_writer(char *w) ; int pop_journal_writer(int windex) ; int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ; -int reiserfs_in_journal(struct super_block *p_s_sb, unsigned long bl, int searchall, unsigned long *next) ; +int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr, int searchall, unsigned long *next) ; int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; void flush_async_commits(struct super_block *p_s_sb) ; @@ -1818,8 +1835,8 @@ void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, #define file_size(inode) ((inode)->i_size) #define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1)) -#define tail_has_to_be_packed(inode) (!dont_have_tails ((inode)->i_sb) &&\ -!STORE_TAIL_IN_UNFM(file_size (inode), tail_size(inode), i_block_size (inode))) +#define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\ +!STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 ) void padd_item (char * item, int total_length, int length); @@ -2015,22 +2032,87 @@ void make_empty_node (struct buffer_info *); struct buffer_head * get_FEB (struct tree_balance *); /* bitmap.c */ + +/* structure contains hints for block allocator, and it is a container for + * arguments, such as node, search path, transaction_handle, etc. */ + struct __reiserfs_blocknr_hint { + struct inode * inode; /* inode passed to allocator, if we allocate unf. nodes */ + long block; /* file offset, in blocks */ + struct key key; + struct path * path; /* search path, used by allocator to deternine search_start by + * various ways */ + struct reiserfs_transaction_handle * th; /* transaction handle is needed to log super blocks and + * bitmap blocks changes */ + b_blocknr_t beg, end; + b_blocknr_t search_start; /* a field used to transfer search start value (block number) + * between different block allocator procedures + * (determine_search_start() and others) */ + int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed + * function that do actual allocation */ + + int formatted_node:1; /* the allocator uses different polices for getting disk space for + * formatted/unformatted blocks with/without preallocation */ + int preallocate:1; +}; + +typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; + +int reiserfs_parse_alloc_options (struct super_block *, char *); int is_reusable (struct super_block * s, unsigned long block, int bit_value); void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long); -int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, - unsigned long * pblocknrs, unsigned long start_from, int amount_needed); -int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th, - unsigned long * pblocknr, unsigned long start_from); +int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t * , int, int); +extern inline int reiserfs_new_form_blocknrs (struct tree_balance * tb, + b_blocknr_t *new_blocknrs, int amount_needed) +{ + reiserfs_blocknr_hint_t hint = { + th:tb->transaction_handle, + path: tb->tb_path, + inode: NULL, + key: tb->key, + block: 0, + formatted_node:1 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, 0); +} + +extern inline int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th, + b_blocknr_t *new_blocknrs, + struct path * path, long block) +{ + reiserfs_blocknr_hint_t hint = { + th: th, + path: path, + inode: NULL, + block: block, + formatted_node: 0, + preallocate: 0 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); +} + #ifdef REISERFS_PREALLOCATE -int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, - struct inode * inode, - unsigned long * pblocknr, - unsigned long start_from); +extern inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle *th, + struct inode * inode, + b_blocknr_t *new_blocknrs, + struct path * path, long block) +{ + reiserfs_blocknr_hint_t hint = { + th: th, + path: path, + inode: inode, + block: block, + formatted_node: 0, + preallocate: 1 + }; + return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); +} void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, struct inode * inode); void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th); #endif +void reiserfs_claim_blocks_to_be_allocated( struct super_block *sb, int blocks); +void reiserfs_release_claimed_blocks( struct super_block *sb, int blocks); /* hashes.c */ __u32 keyed_hash (const signed char *msg, int len); diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h index d76abebe56a8..5c6b26b1d9b5 100644 --- a/include/linux/reiserfs_fs_i.h +++ b/include/linux/reiserfs_fs_i.h @@ -37,6 +37,10 @@ struct reiserfs_inode_info { struct list_head i_prealloc_list; /* per-transaction list of inodes which * have preallocated blocks */ + int new_packing_locality:1; /* new_packig_locality is created; new blocks + * for the contents of this directory should be + * displaced */ + /* we use these for fsync or O_SYNC to decide which transaction ** needs to be committed in order for this inode to be properly ** flushed */ diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 13632da04604..74bd8e0a1d3e 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -160,7 +160,10 @@ struct reiserfs_transaction_handle { int t_blocks_allocated ; /* number of blocks this writer allocated */ unsigned long t_trans_id ; /* sanity check, equals the current trans id */ struct super_block *t_super ; /* super for this FS when journal_begin was - called. */ + called. saves calls to reiserfs_get_super */ + int displace_new_blocks:1; /* if new block allocation occurres, that block + should be displaced from others */ + } ; /* @@ -254,6 +257,14 @@ struct reiserfs_journal { typedef __u32 (*hashf_t) (const signed char *, int); +struct reiserfs_bitmap_info +{ + // FIXME: Won't work with block sizes > 8K + __u16 first_zero_hint; + __u16 free_count; + struct buffer_head *bh; /* the actual bitmap */ +}; + struct proc_dir_entry; #if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) @@ -298,14 +309,15 @@ typedef struct reiserfs_proc_info_data stat_cnt_t need_r_neighbor[ 5 ]; stat_cnt_t free_block; - struct __find_forward_stats { + struct __scan_bitmap_stats { stat_cnt_t call; stat_cnt_t wait; stat_cnt_t bmap; stat_cnt_t retry; stat_cnt_t in_journal_hint; - stat_cnt_t in_journal_out; - } find_forward; + stat_cnt_t in_journal_nohint; + stat_cnt_t stolen; + } scan_bitmap; struct __journal_stats { stat_cnt_t in_journal; stat_cnt_t in_journal_bitmap; @@ -335,7 +347,7 @@ struct reiserfs_sb_info /* both the comment and the choice of name are unclear for s_rs -Hans */ struct reiserfs_super_block * s_rs; /* Pointer to the super block in the buffer */ - struct buffer_head ** s_ap_bitmap; /* array of buffers, holding block bitmap */ + struct reiserfs_bitmap_info * s_ap_bitmap; struct reiserfs_journal *s_journal ; /* pointer to journal information */ unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ @@ -348,6 +360,16 @@ struct reiserfs_sb_info here (currently - NOTAIL, NOLOG, REPLAYONLY) */ + struct { /* This is a structure that describes block allocator options */ + unsigned long bits; /* Bitfield for enable/disable kind of options */ + unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ + int border; /* percentage of disk, border takes */ + int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ + int preallocsize; /* Number of blocks we try to prealloc when file + reaches preallocmin size (in blocks) or + prealloc_list is empty. */ + } s_alloc_options; + /* Comment? -Hans */ wait_queue_head_t s_wait; /* To be obsoleted soon by per buffer seals.. -Hans */ @@ -374,6 +396,7 @@ struct reiserfs_sb_info int s_is_unlinked_ok; reiserfs_proc_info_data_t s_proc_info_data; struct proc_dir_entry *procdir; + int reserved_blocks; /* amount of blocks reserved for further allocations */ }; /* Definitions of reiserfs on-disk properties: */ @@ -381,7 +404,8 @@ struct reiserfs_sb_info #define REISERFS_3_6 1 /* Mount options */ -#define NOTAIL 0 /* -o notail: no tails will be created in a session */ +#define REISERFS_LARGETAIL 0 /* large tails will be created in a session */ +#define REISERFS_SMALLTAIL 17 /* small (for files less than block size) tails will be created in a session */ #define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */ #define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */ #define REISERFS_CONVERT 5 /* -o conv: causes conversion of old @@ -429,7 +453,8 @@ struct reiserfs_sb_info #define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) #define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4)) -#define dont_have_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << NOTAIL)) +#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL)) +#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL)) #define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY)) #define reiserfs_dont_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NOLOG)) #define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5)) -- cgit v1.2.3 From b315226781b180732abb0aa4ead6322b8c7827c6 Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:03:26 -0700 Subject: [PATCH] (1/25) Unexporting helper functions wipe_partitions() and driverfs_register_partitions(..., 1) (i.e. unregistering them) pulled into del_gendisk() and removed from callers. grok_partitions() merged with register_disk(). devfs_register_partitions(), grok_partitions() and wipe_partitions() not exported anymore. --- drivers/block/cciss.c | 2 -- drivers/block/cpqarray.c | 1 - drivers/block/genhd.c | 14 ++++++++------ drivers/block/umem.c | 4 +--- drivers/message/i2o/i2o_block.c | 2 -- drivers/mtd/ftl.c | 1 - drivers/s390/block/dasd_genhd.c | 8 -------- drivers/scsi/sd.c | 3 --- fs/partitions/check.c | 10 +--------- include/linux/blkdev.h | 1 - kernel/ksyms.c | 3 --- 11 files changed, 10 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index db3d25ebfd35..df336db3a336 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -742,7 +742,6 @@ static int revalidate_allvol(kdev_t dev) for(i=0; i< NWD; i++) { struct gendisk *disk = &hba[ctlr]->gendisk[i]; if (disk->major_name) { - wipe_partitions(mk_kdev(disk->major, disk->first_minor)); del_gendisk(disk); disk->major_name = NULL; } @@ -802,7 +801,6 @@ static int deregister_disk(int ctlr, int logvol) /* invalidate the devices and deregister the disk */ if (disk->major_name) { - wipe_partitions(mk_kdev(disk->major, disk->first_minor)); del_gendisk(disk); disk->major_name = NULL; } diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index b9dfeb902e3c..aba33e13c9a6 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1461,7 +1461,6 @@ static int revalidate_allvol(kdev_t dev) struct gendisk *disk = ida_gendisk + ctlr*NWD + i; if (!disk->major_name) continue; - wipe_partitions(mk_kdev(disk->major, disk->first_minor)); del_gendisk(disk); disk->major_name = NULL; } diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index e17207282c98..4535038fe435 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -77,17 +77,19 @@ EXPORT_SYMBOL(add_gendisk); * with the kernel. */ void -del_gendisk(struct gendisk *gp) +del_gendisk(struct gendisk *disk) { - struct gendisk **gpp; + struct gendisk **p; + wipe_partitions(mk_kdev(disk->major, disk->first_minor)); write_lock(&gendisk_lock); - for (gpp = &gendisk_head; *gpp; gpp = &((*gpp)->next)) - if (*gpp == gp) + for (p = &gendisk_head; *p; p = &((*p)->next)) + if (*p == disk) break; - if (*gpp) - *gpp = (*gpp)->next; + if (*p) + *p = (*p)->next; write_unlock(&gendisk_lock); + devfs_register_partitions(disk, disk->first_minor, 1); } EXPORT_SYMBOL(del_gendisk); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 482aa8634984..b326b6df938b 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -1222,10 +1222,8 @@ void __exit mm_cleanup(void) del_battery_timer(); - for (i=0; i < num_cards ; i++) { - devfs_register_partitions(mm_gendisk + i, i<>4]; - wipe_partitions(mk_kdev(MAJOR_NR, unit)); del_gendisk(p); for(i = unit; i <= unit+15; i++) blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); @@ -1616,7 +1615,6 @@ void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d) * This will force errors when i2ob_get_queue() is called * by the kenrel. */ - wipe_partitions(mk_kdev(MAJOR_NR, unit)); del_gendisk(&i2o_disk[unit>>4]); i2ob_dev[unit].req_queue = NULL; for(i = unit; i <= unit+15; i++) diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 83e5ba3c3f7f..268f8e363a90 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1298,7 +1298,6 @@ static void ftl_notify_remove(struct mtd_info *mtd) ftl_freepart(myparts[i]); myparts[i]->state = 0; - wipe_partitions(mk_kdev(MAJOR_NR, i<<4)); del_gendisk(myparts[i]->disk); kfree(myparts[i]->disk->name); kfree(myparts[i]->disk); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 5733d3f466f5..2becf7eb8567 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -284,14 +284,6 @@ dasd_destroy_partitions(dasd_device_t * device) if (disk == NULL) return; - wipe_partitions(device->kdev); - - /* - * This is confusing. The funcions is devfs_register_partitions - * but the 1 as third parameter makes it do an unregister... - * FIXME: there must be a better way to get rid of the devfs entries - */ - devfs_register_partitions(disk, minor(device->kdev), 1); del_gendisk(disk); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 09f49e427f16..8c1e5d9ef3b4 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1426,9 +1426,6 @@ static void sd_detach(Scsi_Device * sdp) sdkp->has_been_registered = 0; dev = MKDEV_SD(dsk_nr); driverfs_remove_partitions(sd_disks[dsk_nr], minor(dev)); - wipe_partitions(dev); - devfs_register_partitions (sd_disks[dsk_nr], minor(dev), 1); - /* unregister_disk() */ del_gendisk(sd_disks[dsk_nr]); } sdp->attached--; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 907cb1faf056..5c3bc6cdd864 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -414,18 +414,10 @@ void devfs_register_partitions (struct gendisk *dev, int minor, int unregister) * done */ -void register_disk(struct gendisk *gdev, kdev_t dev, unsigned minors, +void register_disk(struct gendisk *g, kdev_t dev, unsigned minors, struct block_device_operations *ops, long size) -{ - if (!gdev) - return; - grok_partitions(dev, size); -} - -void grok_partitions(kdev_t dev, long size) { struct block_device *bdev; - struct gendisk *g = get_gendisk(dev); struct hd_struct *p; if (!g) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9f1ac6d07fe8..e0fd1bbd5520 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -281,7 +281,6 @@ struct sec_size { extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern void grok_partitions(kdev_t dev, long size); extern int wipe_partitions(kdev_t dev); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); extern void check_partition(struct gendisk *disk, struct block_device *bdev); diff --git a/kernel/ksyms.c b/kernel/ksyms.c index ad618a550f86..df1b2e47e919 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -335,17 +335,14 @@ EXPORT_SYMBOL(blk_dev); EXPORT_SYMBOL(bdev_read_only); EXPORT_SYMBOL(set_device_ro); EXPORT_SYMBOL(bmap); -EXPORT_SYMBOL(devfs_register_partitions); EXPORT_SYMBOL(driverfs_remove_partitions); EXPORT_SYMBOL(blkdev_open); EXPORT_SYMBOL(blkdev_get); EXPORT_SYMBOL(blkdev_put); EXPORT_SYMBOL(ioctl_by_bdev); -EXPORT_SYMBOL(grok_partitions); EXPORT_SYMBOL(register_disk); EXPORT_SYMBOL(read_dev_sector); EXPORT_SYMBOL(init_buffer); -EXPORT_SYMBOL(wipe_partitions); EXPORT_SYMBOL_GPL(generic_file_direct_IO); /* tty routines */ -- cgit v1.2.3 From 4e49388694356ae25d69772092ade844fac54e61 Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:03:31 -0700 Subject: [PATCH] (2/25) Removing ->nr_real Since ->nr_real is always 1 now, we can remove that field completely. Removed the last remnants of switch in disk_name() (it could be killed a long time ago, I just forgot to remove the last two cases when md and i2o got converted). Collapsed several instances of disk->part[minor - disk->first_minor] - in cases when we know that we deal with disk->part[0]. --- drivers/acorn/block/mfmhd.c | 1 - drivers/block/DAC960.c | 1 - drivers/block/acsi.c | 1 - drivers/block/cciss.c | 1 - drivers/block/cpqarray.c | 1 - drivers/block/genhd.c | 5 ++--- drivers/block/paride/pd.c | 1 - drivers/block/ps2esdi.c | 2 -- drivers/block/umem.c | 1 - drivers/block/xd.c | 2 -- drivers/ide/hd.c | 1 - drivers/ide/ide-probe.c | 1 - drivers/md/md.c | 1 - drivers/message/i2o/i2o_block.c | 1 - drivers/mtd/ftl.c | 1 - drivers/mtd/nftlcore.c | 1 - drivers/s390/block/dasd_genhd.c | 1 - drivers/scsi/sd.c | 1 - fs/block_dev.c | 3 +-- fs/partitions/check.c | 43 +++++++++++------------------------------ include/linux/genhd.h | 2 -- 21 files changed, 14 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c index d1f952e1643b..38f7dfc9e82e 100644 --- a/drivers/acorn/block/mfmhd.c +++ b/drivers/acorn/block/mfmhd.c @@ -1280,7 +1280,6 @@ static void mfm_geninit (void) outw(0x80, mfm_irqenable); /* Required to enable IRQs from MFM podule */ for (i = 0; i < mfm_drives; i++) { - mfm_gendisk[i].nr_real = 1; add_gendisk(mfm_gendisk + i); mfm_geometry (i); register_disk(mfm_gendisk + i, mk_kdev(MAJOR_NR,i<<6), 1<<6, diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 4256cbb35e78..72d50aa7b8f2 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1973,7 +1973,6 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) disk->first_minor = n << DAC960_MaxPartitionsBits; disk->major_name = names + 9 * n; disk->minor_shift = DAC960_MaxPartitionsBits; - disk->nr_real = 1; disk->fops = &DAC960_BlockDeviceOperations; add_gendisk(disk); } diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c index ff0480293480..e333d4d375c7 100644 --- a/drivers/block/acsi.c +++ b/drivers/block/acsi.c @@ -1704,7 +1704,6 @@ static void acsi_geninit(void) disk->minor_shift = (acsi_info[i].type==HARDDISK)?4:0; disk->part = acsi_part + (i<<4); disk->fops = &acsi_fops; - disk->nr_real = 1; add_gendisk(disk); register_disk(disk, mk_kdev(disk->major, disk->first_minor), 1<minor_shift, diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index df336db3a336..73f5888010d2 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2447,7 +2447,6 @@ static int __init cciss_init_one(struct pci_dev *pdev, disk->major_name = NULL; disk->minor_shift = NWD_SHIFT; disk->part = hba[i]->hd + (j << NWD_SHIFT); - disk->nr_real = 1; if( !(drv->nr_blocks)) continue; (BLK_DEFAULT_QUEUE(MAJOR_NR + i))->hardsect_size = drv->block_size; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index aba33e13c9a6..396a114b4e24 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -451,7 +451,6 @@ int __init cpqarray_init(void) disk->first_minor = j<minor_shift = NWD_SHIFT; disk->part = ida + i*256 + (j<nr_real = 1; disk->de_arr = &de_arr[i][j]; disk->fops = &ida_fops; if (!drv->nr_blks) diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index 4535038fe435..f30b605b71d3 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -162,9 +162,8 @@ static int show_partition(struct seq_file *part, void *v) seq_puts(part, "major minor #blocks name\n\n"); /* show the full disk and all non-0 size partitions of it */ - for (n = 0; n < (sgp->nr_real << sgp->minor_shift); n++) { - int minormask = (1<minor_shift) - 1; - if ((n & minormask) && sgp->part[n].nr_sects == 0) + for (n = 0; n < 1<minor_shift; n++) { + if (n && sgp->part[n].nr_sects == 0) continue; seq_printf(part, "%4d %4d %10ld %s\n", sgp->major, n + sgp->first_minor, diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 6d6b1ab6a7c4..b100a1faf6b8 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -687,7 +687,6 @@ static int pd_detect( void ) PD.gd.major_name = PD.name; PD.gd.minor_shift = PD_BITS; PD.gd.fops = &pd_fops; - PD.gd.nr_real = 1; PD.gd.major = major; PD.gd.first_minor = unit << PD_BITS; PD.gd.part = pd_hd + (unit << PD_BITS); diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c index c1076b2b65d0..0882dd30d940 100644 --- a/drivers/block/ps2esdi.c +++ b/drivers/block/ps2esdi.c @@ -154,7 +154,6 @@ static struct gendisk ps2esdi_gendisk[2] = { minor_shift: 6, part: ps2esdi, fops: &ps2esdi_fops, - nr_real: 1 },{ major: MAJOR_NR, first_minor: 64, @@ -162,7 +161,6 @@ static struct gendisk ps2esdi_gendisk[2] = { minor_shift: 6, part: ps2esdi+64, fops: &ps2esdi_fops, - nr_real: 1 } }; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index b326b6df938b..c1c872a6e358 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -1193,7 +1193,6 @@ int __init mm_init(void) sprintf(mm_names + i*6, "umem%c", 'a'+i); spin_lock_init(&cards[i].lock); disk->part = mm_partitions + (i << MM_SHIFT); - disk->nr_real = 1; disk->major = major_nr; disk->first_minor = i << MM_SHIFT; disk->major_name = mm_names + i*6; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 6dc939a7a055..7f30d2df7506 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -135,7 +135,6 @@ static struct gendisk xd_gendisk[2] = { .minor_shift = 6, .part = xd_struct, .fops = &xd_fops, - .nr_real = 1 },{ .major = MAJOR_NR, .first_minor = 64, @@ -143,7 +142,6 @@ static struct gendisk xd_gendisk[2] = { .minor_shift = 6, .part = xd_struct + 64, .fops = &xd_fops, - .nr_real = 1 } }; diff --git a/drivers/ide/hd.c b/drivers/ide/hd.c index 81624ccfe02f..534983a99346 100644 --- a/drivers/ide/hd.c +++ b/drivers/ide/hd.c @@ -849,7 +849,6 @@ static void __init hd_geninit(void) } for(drive=0; drive < NR_HD; drive++) { - hd_gendisk[drive].nr_real = 1; add_gendisk(hd_gendisk + drive); register_disk(hd_gendisk + drive, mk_kdev(MAJOR_NR,drive<<6), 1<<6, diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1b972998ea2f..cd9b76e89e46 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -861,7 +861,6 @@ static void init_gendisk (ide_hwif_t *hwif) sprintf(names + 4*unit, "hd%c",'a'+hwif->index*MAX_DRIVES+unit); gd[unit].major_name = names + 4*unit; gd[unit].minor_shift = PARTN_BITS; - gd[unit].nr_real = 1; gd[unit].fops = ide_fops; hwif->gd[unit] = gd + unit; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 41b8c222a538..04dc32ea7ce2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1459,7 +1459,6 @@ static int do_md_run(mddev_t * mddev) sprintf(major_name, "md%d", mdidx(mddev)); disk->major_name = major_name; disk->part = md_hd_struct + mdidx(mddev); - disk->nr_real = 1; disk->fops = &md_fops; mddev->pers = pers[pnum]; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 15228d03a9af..a8cdfb80a95a 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -1773,7 +1773,6 @@ int i2o_block_init(void) disk->minor_shift = 4; disk->part = i2ob + (i<<4); disk->fops = &i2ob_fops; - disk->nr_real = 1; disk->major_name = i2o_names + i*8; sprintf(disk->major_name, "i2o/hd%c", 'a' + i); } diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 268f8e363a90..4d50cf22fc58 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1254,7 +1254,6 @@ static void ftl_notify_add(struct mtd_info *mtd) disk->minor_shift = PART_BITS; disk->part = ftl_hd + (device << 4); disk->fops = &ftl_blk_fops; - disk->nr_real = 1; partition->mtd = mtd; partition->disk = disk; diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index 2de5c0783fde..27cd8b48ca14 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -152,7 +152,6 @@ static void NFTL_setup(struct mtd_info *mtd) gd->minor_shift = NFTL_PARTN_BITS; gd->part = part_table + (firstfree << NFTL_PARTN_BITS); gd->major_name = name; - gd->nr_real = 1; nftl->disk = gd; add_gendisk(gd); register_disk(gd, mk_kdev(MAJOR_NR,firstfree<major = new_major; disk->first_minor = i << DASD_PARTN_BITS; disk->minor_shift = DASD_PARTN_BITS; - disk->nr_real = 1; disk->fops = &dasd_device_operations; disk->de_arr = mi->de_arr + i; disk->flags = mi->flags + i; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 8c1e5d9ef3b4..ddff04c6d4b2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1351,7 +1351,6 @@ static int sd_attach(Scsi_Device * sdp) } sd_template.nr_dev++; - gd->nr_real = 1; gd->de_arr[0] = sdp->de; gd->driverfs_dev_arr[0] = &sdp->sdev_driverfs_dev; gd->major = SD_MAJOR(dsk_nr>>4); diff --git a/fs/block_dev.c b/fs/block_dev.c index 56fec1317128..a7bd20a9dd48 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -602,8 +602,7 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * struct gendisk *g = get_gendisk(dev); bdev->bd_contains = bdev; if (g) { - int shift = g->minor_shift; - unsigned minor0 = (minor >> shift) << shift; + unsigned minor0 = g->first_minor; if (minor != minor0) { struct block_device *disk; disk = bdget(MKDEV(major(dev), minor0)); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 5c3bc6cdd864..6b79a6f10f2c 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -89,14 +89,12 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = char *disk_name (struct gendisk *hd, int minor, char *buf) { - unsigned int unit = (minor >> hd->minor_shift); - unsigned int part = (minor & ((1 << hd->minor_shift) -1 )); - struct hd_struct *p = hd->part + minor - hd->first_minor; + unsigned int part = minor - hd->first_minor; + struct hd_struct *p = hd->part + part; char s[40]; const char *maj; - if ((((minor - hd->first_minor) >> hd->minor_shift) < hd->nr_real) && - p->de) { + if (part < 1<minor_shift && p->de) { int pos; pos = devfs_generate_path(p->de, buf, 64); @@ -104,23 +102,7 @@ char *disk_name (struct gendisk *hd, int minor, char *buf) return buf + pos; } - /* - * Yes, I know, ... in cases is gccism and not a pretty one. - * However, the first variant will eventually consume _all_ cases - * and switch will disappear. - */ - switch (hd->major) { - default: - maj = hd->major_name; - break; - case MD_MAJOR: - sprintf(s, "%s%d", "md", unit); - maj = s; - break; - case I2O_MAJOR: - sprintf(s, "%s%c", hd->major_name, unit + 'a'); - maj = s; - } + maj = hd->major_name; if (!part) sprintf(buf, "%s", maj); else if (isdigit(maj[strlen(maj)-1])) @@ -150,7 +132,6 @@ static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL); void driverfs_create_partitions(struct gendisk *hd, int minor) { int pos = -1; - int devnum = (minor - hd->first_minor) >> hd->minor_shift; char dirname[256]; struct device *parent = 0; int max_p; @@ -160,13 +141,13 @@ void driverfs_create_partitions(struct gendisk *hd, int minor) /* get parent driverfs device structure */ if (hd->driverfs_dev_arr) - parent = hd->driverfs_dev_arr[devnum]; + parent = hd->driverfs_dev_arr[0]; else /* if driverfs not supported by subsystem, skip partitions */ return; /* get parent device node directory name */ if (hd->de_arr) { - dir = hd->de_arr[devnum]; + dir = hd->de_arr[0]; if (dir) pos = devfs_generate_path (dir, dirname, sizeof dirname); @@ -268,7 +249,7 @@ void check_partition(struct gendisk *hd, struct block_device *bdev) return; if (hd->de_arr) - de = hd->de_arr[(minor(dev)-hd->first_minor)>>hd->minor_shift]; + de = hd->de_arr[0]; i = devfs_generate_path (de, buf, sizeof buf); if (i >= 0) { printk(KERN_INFO " /dev/%s:", buf + i); @@ -315,7 +296,6 @@ out: #ifdef CONFIG_DEVFS_FS static void devfs_register_partition (struct gendisk *dev, int minor, int part) { - int devnum = (minor - dev->first_minor) >> dev->minor_shift; devfs_handle_t dir; unsigned int devfs_flags = DEVFS_FL_DEFAULT; struct hd_struct *p = dev->part + minor - dev->first_minor; @@ -326,7 +306,7 @@ static void devfs_register_partition (struct gendisk *dev, int minor, int part) dir = devfs_get_parent(p[0].de); if (!dir) return; - if ( dev->flags && (dev->flags[devnum] & GENHD_FL_REMOVABLE) ) + if ( dev->flags && (dev->flags[0] & GENHD_FL_REMOVABLE) ) devfs_flags |= DEVFS_FL_REMOVABLE; sprintf (devname, "part%d", part); p[part].de = devfs_register (dir, devname, devfs_flags, @@ -340,7 +320,6 @@ static struct unique_numspace disc_numspace = UNIQUE_NUMBERSPACE_INITIALISER; static void devfs_register_disc (struct gendisk *dev, int minor) { int pos = 0; - int devnum = (minor - dev->first_minor) >> dev->minor_shift; devfs_handle_t dir, slave; unsigned int devfs_flags = DEVFS_FL_DEFAULT; char dirname[64], symlink[16]; @@ -349,10 +328,10 @@ static void devfs_register_disc (struct gendisk *dev, int minor) if (p[0].de) return; - if ( dev->flags && (dev->flags[devnum] & GENHD_FL_REMOVABLE) ) + if ( dev->flags && (dev->flags[0] & GENHD_FL_REMOVABLE) ) devfs_flags |= DEVFS_FL_REMOVABLE; if (dev->de_arr) { - dir = dev->de_arr[devnum]; + dir = dev->de_arr[0]; if (!dir) /* Aware driver wants to block disc management */ return; pos = devfs_generate_path(dir, dirname + 3, sizeof dirname-3); @@ -362,7 +341,7 @@ static void devfs_register_disc (struct gendisk *dev, int minor) } else { /* Unaware driver: construct "real" directory */ sprintf(dirname, "../%s/disc%d", dev->major_name, - (dev->first_minor >> dev->minor_shift) + devnum); + dev->first_minor >> dev->minor_shift); dir = devfs_mk_dir(NULL, dirname + 3, NULL); } if (!devfs_handle) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fd9f5a8d7c06..304ebe48ec24 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -76,8 +76,6 @@ struct gendisk { get real minor */ struct hd_struct *part; /* [indexed by minor] */ - int nr_real; /* number of real devices */ - struct gendisk *next; struct block_device_operations *fops; -- cgit v1.2.3 From 36bd834b4b5c9e59afc45f863075b4687077ebab Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:03:36 -0700 Subject: [PATCH] (3/25) Removing useless minor arguments driverfs_remove_partitions(), devfs_register_partitions(), driverfs_create_partitions(), devfs_register_partition(), devfs_register_disc(), had lost 'minor' argument - it's always disk->first_minor these days. disk_name() takes partition number instead of minor now. Callers of wipe_partitions() in fs/block_dev.c expanded. Remaining caller passes gendisk instead of kdev_t now. --- drivers/block/blkpg.c | 25 +++++------- drivers/block/genhd.c | 6 +-- drivers/md/md.c | 2 +- drivers/scsi/sd.c | 4 +- fs/block_dev.c | 42 ++++++++++++-------- fs/partitions/check.c | 102 +++++++++++++++++++------------------------------ include/linux/blkdev.h | 2 +- include/linux/genhd.h | 7 ++-- 8 files changed, 85 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c index 1d13a84a9919..106df6b4644c 100644 --- a/drivers/block/blkpg.c +++ b/drivers/block/blkpg.c @@ -71,7 +71,6 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p) long pstart, plength; int i; kdev_t dev = to_kdev_t(bdev->bd_dev); - struct hd_struct *part; /* convert bytes to sectors, check for fit in a hd_struct */ ppstart = (p->start >> 9); @@ -86,7 +85,6 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p) g = get_gendisk(dev); if (!g) return -ENXIO; - part = g->part + minor(dev) - g->first_minor; /* existing drive? */ @@ -97,19 +95,19 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p) return -EINVAL; /* partition number in use? */ - if (part[p->pno].nr_sects != 0) + if (g->part[p->pno].nr_sects != 0) return -EBUSY; /* overlap? */ for (i = 1; i < (1<minor_shift); i++) - if (!(pstart+plength <= part[i].start_sect || - pstart >= part[i].start_sect + part[i].nr_sects)) + if (!(pstart+plength <= g->part[i].start_sect || + pstart >= g->part[i].start_sect + g->part[i].nr_sects)) return -EBUSY; /* all seems OK */ - part[p->pno].start_sect = pstart; - part[p->pno].nr_sects = plength; - devfs_register_partitions (g, minor(dev), 0); + g->part[p->pno].start_sect = pstart; + g->part[p->pno].nr_sects = plength; + devfs_register_partitions (g, 0); return 0; } @@ -128,22 +126,19 @@ int del_partition(struct block_device *bdev, struct blkpg_partition *p) kdev_t dev = to_kdev_t(bdev->bd_dev); struct gendisk *g; struct block_device *bdevp; - struct hd_struct *part; int holder; /* find the drive major */ g = get_gendisk(dev); if (!g) return -ENXIO; - part = g->part + minor(dev) - g->first_minor; - if (bdev != bdev->bd_contains) return -EINVAL; if (p->pno <= 0 || p->pno >= (1 << g->minor_shift)) return -EINVAL; /* existing drive and partition? */ - if (part[p->pno].nr_sects == 0) + if (g->part[p->pno].nr_sects == 0) return -ENXIO; /* partition in use? Incomplete check for now. */ @@ -159,9 +154,9 @@ int del_partition(struct block_device *bdev, struct blkpg_partition *p) fsync_bdev(bdevp); invalidate_bdev(bdevp, 0); - part[p->pno].start_sect = 0; - part[p->pno].nr_sects = 0; - devfs_register_partitions (g, minor(dev), 0); + g->part[p->pno].start_sect = 0; + g->part[p->pno].nr_sects = 0; + devfs_register_partitions (g, 0); bd_release(bdevp); bdput(bdevp); diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index f30b605b71d3..782017d52905 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -81,7 +81,7 @@ del_gendisk(struct gendisk *disk) { struct gendisk **p; - wipe_partitions(mk_kdev(disk->major, disk->first_minor)); + wipe_partitions(disk); write_lock(&gendisk_lock); for (p = &gendisk_head; *p; p = &((*p)->next)) if (*p == disk) @@ -89,7 +89,7 @@ del_gendisk(struct gendisk *disk) if (*p) *p = (*p)->next; write_unlock(&gendisk_lock); - devfs_register_partitions(disk, disk->first_minor, 1); + devfs_register_partitions(disk, 1); } EXPORT_SYMBOL(del_gendisk); @@ -168,7 +168,7 @@ static int show_partition(struct seq_file *part, void *v) seq_printf(part, "%4d %4d %10ld %s\n", sgp->major, n + sgp->first_minor, sgp->part[n].nr_sects >> 1 , - disk_name(sgp, n + sgp->first_minor, buf)); + disk_name(sgp, n, buf)); } return 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index 04dc32ea7ce2..e50bfe391c01 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -276,7 +276,7 @@ char * partition_name(kdev_t dev) hd = get_gendisk (dev); dname->name = NULL; if (hd) - dname->name = disk_name (hd, minor(dev), dname->namebuf); + dname->name = disk_name(hd, minor(dev)-hd->first_minor, dname->namebuf); if (!dname->name) { sprintf (dname->namebuf, "[dev %s]", kdevname(dev)); dname->name = dname->namebuf; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ddff04c6d4b2..b8cdd54caaae 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -126,8 +126,6 @@ static void sd_rw_intr(Scsi_Cmnd * SCpnt); static Scsi_Disk * sd_get_sdisk(int index); -extern void driverfs_remove_partitions(struct gendisk *hd, int minor); - #if defined(CONFIG_PPC32) /** * sd_find_target - find kdev_t of first scsi disk that matches @@ -1424,7 +1422,7 @@ static void sd_detach(Scsi_Device * sdp) if (sdkp->has_been_registered) { sdkp->has_been_registered = 0; dev = MKDEV_SD(dsk_nr); - driverfs_remove_partitions(sd_disks[dsk_nr], minor(dev)); + driverfs_remove_partitions(sd_disks[dsk_nr]); del_gendisk(sd_disks[dsk_nr]); } sdp->attached--; diff --git a/fs/block_dev.c b/fs/block_dev.c index a7bd20a9dd48..637c6f0f387d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -528,16 +528,21 @@ int check_disk_change(struct block_device *bdev) int full_check_disk_change(struct block_device *bdev) { int res; + if (bdev->bd_contains != bdev) + BUG(); down(&bdev->bd_sem); res = check_disk_change(bdev); if (bdev->bd_invalidated && !bdev->bd_part_count) { - struct gendisk *g = get_gendisk(to_kdev_t(bdev->bd_dev)); - struct hd_struct *part; - part = g->part + MINOR(bdev->bd_dev) - g->first_minor; + struct gendisk *disk = get_gendisk(to_kdev_t(bdev->bd_dev)); + int p; bdev->bd_invalidated = 0; - wipe_partitions(to_kdev_t(bdev->bd_dev)); - if (part[0].nr_sects) - check_partition(g, bdev); + for (p = 1; p < 1<minor_shift; p++) { + disk->part[p].start_sect = 0; + disk->part[p].nr_sects = 0; + } + res = invalidate_device(to_kdev_t(bdev->bd_dev), 1); + if (disk->part[0].nr_sects) + check_partition(disk, bdev); } up(&bdev->bd_sem); return res; @@ -650,11 +655,14 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * bdev->bd_inode->i_data.backing_dev_info = bdi; } if (bdev->bd_invalidated && !bdev->bd_part_count) { - struct hd_struct *part; - part = g->part + minor(dev) - g->first_minor; + int p; bdev->bd_invalidated = 0; - wipe_partitions(dev); - if (part[0].nr_sects) + for (p = 1; p < 1<minor_shift; p++) { + g->part[p].start_sect = 0; + g->part[p].nr_sects = 0; + } + invalidate_device(dev, 1); + if (g->part[0].nr_sects) check_partition(g, bdev); } } else { @@ -791,12 +799,10 @@ static int blkdev_reread_part(struct block_device *bdev) { kdev_t dev = to_kdev_t(bdev->bd_dev); struct gendisk *disk = get_gendisk(dev); - struct hd_struct *part; - int res; + int p, res; - if (!disk || !disk->minor_shift) + if (!disk || !disk->minor_shift || bdev != bdev->bd_contains) return -EINVAL; - part = disk->part + minor(dev) - disk->first_minor; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (down_trylock(&bdev->bd_sem)) @@ -805,11 +811,15 @@ static int blkdev_reread_part(struct block_device *bdev) up(&bdev->bd_sem); return -EBUSY; } - res = wipe_partitions(dev); + for (p = 1; p < 1 << disk->minor_shift; p++) { + disk->part[p].start_sect = 0; + disk->part[p].nr_sects = 0; + } + res = invalidate_device(dev, 1); if (!res) { if (bdev->bd_op->revalidate) bdev->bd_op->revalidate(dev); - if (part[0].nr_sects) + if (disk->part[0].nr_sects) check_partition(disk, bdev); } up(&bdev->bd_sem); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6b79a6f10f2c..6dd2bb455c27 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -87,28 +87,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = * a pointer to that same buffer (for convenience). */ -char *disk_name (struct gendisk *hd, int minor, char *buf) +char *disk_name (struct gendisk *hd, int part, char *buf) { - unsigned int part = minor - hd->first_minor; - struct hd_struct *p = hd->part + part; - char s[40]; - const char *maj; - - if (part < 1<minor_shift && p->de) { + if (part < 1<minor_shift && hd->part[part].de) { int pos; - pos = devfs_generate_path(p->de, buf, 64); + pos = devfs_generate_path(hd->part[part].de, buf, 64); if (pos >= 0) return buf + pos; } - - maj = hd->major_name; if (!part) - sprintf(buf, "%s", maj); - else if (isdigit(maj[strlen(maj)-1])) - sprintf(buf, "%sp%d", maj, part); + sprintf(buf, "%s", hd->major_name); + else if (isdigit(hd->major_name[strlen(hd->major_name)-1])) + sprintf(buf, "%sp%d", hd->major_name, part); else - sprintf(buf, "%s%d", maj, part); + sprintf(buf, "%s%d", hd->major_name, part); return buf; } @@ -129,7 +122,7 @@ static ssize_t partition_device_type_read(struct device *driverfs_dev, } static DEVICE_ATTR(type,S_IRUGO,partition_device_type_read,NULL); -void driverfs_create_partitions(struct gendisk *hd, int minor) +static void driverfs_create_partitions(struct gendisk *hd) { int pos = -1; char dirname[256]; @@ -137,7 +130,7 @@ void driverfs_create_partitions(struct gendisk *hd, int minor) int max_p; int part; devfs_handle_t dir = 0; - struct hd_struct *p = hd->part + minor - hd->first_minor; + struct hd_struct *p = hd->part; /* get parent driverfs device structure */ if (hd->driverfs_dev_arr) @@ -154,7 +147,7 @@ void driverfs_create_partitions(struct gendisk *hd, int minor) } if (pos < 0) { - disk_name(hd, minor, dirname); + disk_name(hd, 0, dirname); pos = 0; } @@ -168,7 +161,7 @@ void driverfs_create_partitions(struct gendisk *hd, int minor) current_driverfs_dev->parent = parent; /* handle disc case */ current_driverfs_dev->driver_data = - (void *)(long)__mkdev(hd->major, minor+part); + (void *)(long)__mkdev(hd->major, hd->first_minor+part); if (part == 0) { if (parent) { sprintf(current_driverfs_dev->name, @@ -204,12 +197,12 @@ void driverfs_create_partitions(struct gendisk *hd, int minor) } } -void driverfs_remove_partitions(struct gendisk *hd, int minor) +void driverfs_remove_partitions(struct gendisk *hd) { int max_p; int part; struct device * current_driverfs_dev; - struct hd_struct *p = hd->part + minor - hd->first_minor; + struct hd_struct *p = hd->part; max_p=(1 << hd->minor_shift); @@ -225,10 +218,8 @@ void driverfs_remove_partitions(struct gendisk *hd, int minor) } } current_driverfs_dev = &p->hd_driverfs_dev; - device_remove_file(current_driverfs_dev, - &dev_attr_type); - device_remove_file(current_driverfs_dev, - &dev_attr_kdev); + device_remove_file(current_driverfs_dev, &dev_attr_type); + device_remove_file(current_driverfs_dev, &dev_attr_kdev); put_device(current_driverfs_dev); return; } @@ -256,7 +247,7 @@ void check_partition(struct gendisk *hd, struct block_device *bdev) sprintf(state->name, "p"); } else { unsigned n = hd->major; - disk_name(hd, minor(dev), state->name); + disk_name(hd, 0, state->name); printk(KERN_INFO " %s:", state->name); if (n - COMPAQ_SMART2_MAJOR <= 7 || n - COMPAQ_CISS_MAJOR <= 7) sprintf(state->name, "p"); @@ -274,7 +265,7 @@ void check_partition(struct gendisk *hd, struct block_device *bdev) printk(" unable to read partition table\n"); goto out; } - p = hd->part + minor(dev) - hd->first_minor; + p = hd->part; for (j = 1; j < state->limit; j++) { p[j].start_sect = state->parts[j].from; p[j].nr_sects = state->parts[j].size; @@ -289,16 +280,16 @@ void check_partition(struct gendisk *hd, struct block_device *bdev) printk(" unknown partition table\n"); out: - driverfs_create_partitions(hd, minor(dev)); - devfs_register_partitions (hd, minor(dev), 0); + driverfs_create_partitions(hd); + devfs_register_partitions(hd, 0); } #ifdef CONFIG_DEVFS_FS -static void devfs_register_partition (struct gendisk *dev, int minor, int part) +static void devfs_register_partition(struct gendisk *dev, int part) { devfs_handle_t dir; unsigned int devfs_flags = DEVFS_FL_DEFAULT; - struct hd_struct *p = dev->part + minor - dev->first_minor; + struct hd_struct *p = dev->part; char devname[16]; if (p[part].de) @@ -310,21 +301,21 @@ static void devfs_register_partition (struct gendisk *dev, int minor, int part) devfs_flags |= DEVFS_FL_REMOVABLE; sprintf (devname, "part%d", part); p[part].de = devfs_register (dir, devname, devfs_flags, - dev->major, minor + part, + dev->major, dev->first_minor + part, S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); } static struct unique_numspace disc_numspace = UNIQUE_NUMBERSPACE_INITIALISER; -static void devfs_register_disc (struct gendisk *dev, int minor) +static void devfs_register_disc(struct gendisk *dev) { int pos = 0; devfs_handle_t dir, slave; unsigned int devfs_flags = DEVFS_FL_DEFAULT; char dirname[64], symlink[16]; static devfs_handle_t devfs_handle; - struct hd_struct *p = dev->part + minor - dev->first_minor; + struct hd_struct *p = dev->part; if (p[0].de) return; @@ -350,7 +341,8 @@ static void devfs_register_disc (struct gendisk *dev, int minor) sprintf(symlink, "disc%d", p[0].number); devfs_mk_symlink (devfs_handle, symlink, DEVFS_FL_DEFAULT, dirname + pos, &slave, NULL); - p[0].de = devfs_register (dir, "disc", devfs_flags, dev->major, minor, + p[0].de = devfs_register (dir, "disc", devfs_flags, + dev->major, dev->first_minor, S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); devfs_auto_unregister(p[0].de, slave); if (!dev->de_arr) @@ -358,14 +350,14 @@ static void devfs_register_disc (struct gendisk *dev, int minor) } #endif /* CONFIG_DEVFS_FS */ -void devfs_register_partitions (struct gendisk *dev, int minor, int unregister) +void devfs_register_partitions (struct gendisk *dev, int unregister) { #ifdef CONFIG_DEVFS_FS int part, max_p; - struct hd_struct *p = dev->part + minor - dev->first_minor; + struct hd_struct *p = dev->part; if (!unregister) - devfs_register_disc (dev, minor); + devfs_register_disc(dev); max_p = (1 << dev->minor_shift); for (part = 1; part < max_p; part++) { if ( unregister || (p[part].nr_sects < 1) ) { @@ -373,7 +365,7 @@ void devfs_register_partitions (struct gendisk *dev, int minor, int unregister) p[part].de = NULL; continue; } - devfs_register_partition (dev, minor, part); + devfs_register_partition(dev, part); } if (unregister) { devfs_unregister(p[0].de); @@ -402,7 +394,7 @@ void register_disk(struct gendisk *g, kdev_t dev, unsigned minors, if (!g) return; - p = g->part + minor(dev) - g->first_minor; + p = g->part; p[0].nr_sects = size; /* No minors to use for partitions */ @@ -443,38 +435,24 @@ fail: return NULL; } -int wipe_partitions(kdev_t dev) +int wipe_partitions(struct gendisk *disk) { - struct gendisk *g; - kdev_t devp; - int p, major, minor, minor0, max_p, res; - struct hd_struct *part; - - g = get_gendisk(dev); - if (g == NULL) - return -EINVAL; - - max_p = 1 << g->minor_shift; - major = major(dev); - minor = minor(dev); - minor0 = minor & ~(max_p - 1); - if (minor0 != minor) /* for now only whole-disk reread */ - return -EINVAL; /* %%% later.. */ - - part = g->part + minor - g->first_minor; + int max_p = 1 << disk->minor_shift; + int p; + /* invalidate stuff */ for (p = max_p - 1; p >= 0; p--) { - minor = minor0 + p; - devp = mk_kdev(major,minor); + kdev_t devp = mk_kdev(disk->major,disk->first_minor + p); + int res; #if 0 /* %%% superfluous? */ - if (part[p].nr_sects == 0) + if (disk->part[p].nr_sects == 0) continue; #endif res = invalidate_device(devp, 1); if (res) return res; - part[p].start_sect = 0; - part[p].nr_sects = 0; + disk->part[p].start_sect = 0; + disk->part[p].nr_sects = 0; } return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e0fd1bbd5520..77fec95ff4b4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -281,7 +281,7 @@ struct sec_size { extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern int wipe_partitions(kdev_t dev); +extern int wipe_partitions(struct gendisk *disk); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); extern void check_partition(struct gendisk *disk, struct block_device *bdev); extern void generic_make_request(struct bio *bio); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 304ebe48ec24..dcff86342667 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -240,11 +240,10 @@ struct unixware_disklabel { #ifdef __KERNEL__ -char *disk_name (struct gendisk *hd, int minor, char *buf); +char *disk_name (struct gendisk *hd, int part, char *buf); -extern void devfs_register_partitions (struct gendisk *dev, int minor, - int unregister); -extern void driverfs_remove_partitions (struct gendisk *hd, int minor); +extern void devfs_register_partitions (struct gendisk *dev, int unregister); +extern void driverfs_remove_partitions (struct gendisk *hd); static inline unsigned int disk_index (kdev_t dev) { -- cgit v1.2.3 From ab3bfaa2cca8ffcc9df56e41ebd82ba2837f040c Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:03:44 -0700 Subject: [PATCH] (5/25) Removing bogus arrays - ->flags[] Seeing that now disk->flags[] always consists of one element, we replace char *flags with int flags, remove the junk from places that used to allocate these "arrays" and do obvious updates of the code (s/->flags[0]/->flags/). --- drivers/ide/ide-cd.c | 3 +++ drivers/ide/ide-disk.c | 13 ++----------- drivers/ide/ide-floppy.c | 12 ++---------- drivers/ide/ide-probe.c | 9 --------- drivers/ide/ide.c | 2 -- drivers/s390/block/dasd_genhd.c | 2 -- drivers/scsi/sd.c | 5 +---- fs/partitions/check.c | 4 ++-- include/linux/genhd.h | 2 +- 9 files changed, 11 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index d6e7db8b7887..860f2436179d 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -3147,6 +3147,9 @@ static int ide_cdrom_reinit (ide_drive_t *drive) cdrom_read_toc(drive, &sense); g->minor_shift = 0; + /* probably bogus, but that's the old behaviour */ + g->de_arr[0] = NULL; + g->flags = 0; add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index ce8e1a9e57b9..5fb72ddaf952 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1516,8 +1516,6 @@ static void idedisk_add_settings(ide_drive_t *drive) static void idedisk_setup (ide_drive_t *drive) { - int i; - struct hd_driveid *id = drive->id; unsigned long capacity; @@ -1538,15 +1536,6 @@ static void idedisk_setup (ide_drive_t *drive) drive->doorlocking = 1; } } - for (i = 0; i < MAX_DRIVES; ++i) { - ide_hwif_t *hwif = HWIF(drive); - - if (drive != &hwif->drives[i]) continue; - hwif->gd[i]->de_arr[i] = drive->de; - if (drive->removable) - hwif->gd[i]->flags[i] |= GENHD_FL_REMOVABLE; - break; - } #if 1 (void) probe_lba_addressing(drive, 1); @@ -1714,6 +1703,8 @@ static int idedisk_reinit(ide_drive_t *drive) } DRIVER(drive)->busy--; g->minor_shift = PARTN_BITS; + g->de_arr[0] = drive->de; + g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 24ef0f8ba412..94f57d4b5b9f 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -2065,7 +2065,6 @@ static void idefloppy_add_settings(ide_drive_t *drive) static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) { struct idefloppy_id_gcw gcw; - int i; *((unsigned short *) &gcw) = drive->id->config; drive->driver_data = floppy; @@ -2108,15 +2107,6 @@ static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) (void) idefloppy_get_capacity (drive); idefloppy_add_settings(drive); - for (i = 0; i < MAX_DRIVES; ++i) { - ide_hwif_t *hwif = HWIF(drive); - - if (drive != &hwif->drives[i]) continue; - hwif->gd[i]->de_arr[i] = drive->de; - if (drive->removable) - hwif->gd[i]->flags[i] |= GENHD_FL_REMOVABLE; - break; - } } static int idefloppy_cleanup (ide_drive_t *drive) @@ -2221,6 +2211,8 @@ static int idefloppy_reinit (ide_drive_t *drive) idefloppy_setup (drive, floppy); DRIVER(drive)->busy--; g->minor_shift = PARTN_BITS; + g->de_arr[0] = drive->de; + g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index cd9b76e89e46..9fc3488c111b 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -809,7 +809,6 @@ static void init_gendisk (ide_hwif_t *hwif) struct gendisk *gd; struct hd_struct *part; devfs_handle_t *de_arr; - char *flags; unsigned int unit, units, minors; extern devfs_handle_t ide_devfs_handle; char *names; @@ -841,11 +840,6 @@ static void init_gendisk (ide_hwif_t *hwif) goto err_kmalloc_gd_de_arr; memset(de_arr, 0, sizeof(devfs_handle_t) * MAX_DRIVES); - flags = kmalloc(sizeof(char) * MAX_DRIVES, GFP_KERNEL); - if (!flags) - goto err_kmalloc_gd_flags; - memset(flags, 0, sizeof(char) * MAX_DRIVES); - names = kmalloc (4 * MAX_DRIVES, GFP_KERNEL); if (!names) goto err_kmalloc_gd_names; @@ -854,7 +848,6 @@ static void init_gendisk (ide_hwif_t *hwif) for (unit = 0; unit < units; ++unit) { gd[unit].part = part + (unit << PARTN_BITS); gd[unit].de_arr = de_arr + unit; - gd[unit].flags = flags + unit; hwif->drives[unit].part = gd[unit].part; gd[unit].major = hwif->major; gd[unit].first_minor = unit << PARTN_BITS; @@ -891,8 +884,6 @@ static void init_gendisk (ide_hwif_t *hwif) return; err_kmalloc_gd_names: - kfree(names); -err_kmalloc_gd_flags: kfree(de_arr); err_kmalloc_gd_de_arr: kfree(part); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index c81cbf3ae916..7c1ed4dd8f25 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -2064,8 +2064,6 @@ void ide_unregister (unsigned int index) kfree(gd->part); if (gd->de_arr) kfree (gd->de_arr); - if (gd->flags) - kfree (gd->flags); kfree(gd); for (i = 0; i < MAX_DRIVES; i++) hwif->gd[i] = NULL; diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 056a917799a8..82d060ba4026 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -35,7 +35,6 @@ struct major_info { int major; struct gendisk disks[DASD_PER_MAJOR]; devfs_handle_t de_arr[DASD_PER_MAJOR]; - char flags[DASD_PER_MAJOR]; char names[DASD_PER_MAJOR * 8]; struct hd_struct part[1<minor_shift = DASD_PARTN_BITS; disk->fops = &dasd_device_operations; disk->de_arr = mi->de_arr + i; - disk->flags = mi->flags + i; disk->part = mi->part + (i << DASD_PARTN_BITS); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b4edc9266a48..b1686243dccb 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1302,7 +1302,6 @@ static int sd_attach(Scsi_Device * sdp) struct gendisk disk; devfs_handle_t de; struct device *dev; - char flags; char name[5]; } *p; struct gendisk *gd; @@ -1316,7 +1315,6 @@ static int sd_attach(Scsi_Device * sdp) return 1; gd = &p->disk; gd->de_arr = &p->de; - gd->flags = &p->flags; gd->driverfs_dev_arr = &p->dev; SCSI_LOG_HLQUEUE(3, printk("sd_attach: scsi device: <%d,%d,%d,%d>\n", @@ -1361,8 +1359,7 @@ static int sd_attach(Scsi_Device * sdp) else sprintf(p->name, "sd%c", 'a'+dsk_nr%26); gd->major_name = p->name; - if (sdp->removable) - gd->flags[0] |= GENHD_FL_REMOVABLE; + gd->flags = sdp->removable ? GENHD_FL_REMOVABLE : 0; sd_disks[dsk_nr] = gd; sd_dskname(dsk_nr, diskname); printk(KERN_NOTICE "Attached scsi %sdisk %s at scsi%d, channel %d, " diff --git a/fs/partitions/check.c b/fs/partitions/check.c index fa248882183b..db8d2e8269a3 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -260,7 +260,7 @@ static void devfs_register_partition(struct gendisk *dev, int part) dir = devfs_get_parent(p[0].de); if (!dir) return; - if ( dev->flags && (dev->flags[0] & GENHD_FL_REMOVABLE) ) + if (dev->flags & GENHD_FL_REMOVABLE) devfs_flags |= DEVFS_FL_REMOVABLE; sprintf (devname, "part%d", part); p[part].de = devfs_register (dir, devname, devfs_flags, @@ -282,7 +282,7 @@ static void devfs_register_disc(struct gendisk *dev) if (p[0].de) return; - if ( dev->flags && (dev->flags[0] & GENHD_FL_REMOVABLE) ) + if (dev->flags & GENHD_FL_REMOVABLE) devfs_flags |= DEVFS_FL_REMOVABLE; if (dev->de_arr) { dir = dev->de_arr[0]; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index dcff86342667..aa2f694ceb1a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -81,7 +81,7 @@ struct gendisk { devfs_handle_t *de_arr; /* one per physical disc */ struct device **driverfs_dev_arr;/* support driverfs hierarchy */ - char *flags; /* one per physical disc */ + int flags; }; /* drivers/block/genhd.c */ -- cgit v1.2.3 From c5f45a700c4f2041ae99ed824c70c870b52e3f33 Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:03:49 -0700 Subject: [PATCH] (6/25) Removing bogus arrays - ->driverfs_dev_arr[] disk->driverfs_dev_arr is either NULL or consists of exactly one element. Same change as above (struct device ** -> struct device *); old "is the pointer to array itself NULL or not?" replaced with a flag (in disk->flags). --- drivers/scsi/sd.c | 5 ++--- fs/partitions/check.c | 8 ++++---- include/linux/genhd.h | 3 ++- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b1686243dccb..79deae0ef3f9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1301,7 +1301,6 @@ static int sd_attach(Scsi_Device * sdp) struct { struct gendisk disk; devfs_handle_t de; - struct device *dev; char name[5]; } *p; struct gendisk *gd; @@ -1315,7 +1314,6 @@ static int sd_attach(Scsi_Device * sdp) return 1; gd = &p->disk; gd->de_arr = &p->de; - gd->driverfs_dev_arr = &p->dev; SCSI_LOG_HLQUEUE(3, printk("sd_attach: scsi device: <%d,%d,%d,%d>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun)); @@ -1348,7 +1346,6 @@ static int sd_attach(Scsi_Device * sdp) sd_template.nr_dev++; gd->de_arr[0] = sdp->de; - gd->driverfs_dev_arr[0] = &sdp->sdev_driverfs_dev; gd->major = SD_MAJOR(dsk_nr>>4); gd->first_minor = (dsk_nr & 15)<<4; gd->minor_shift = 4; @@ -1360,6 +1357,8 @@ static int sd_attach(Scsi_Device * sdp) sprintf(p->name, "sd%c", 'a'+dsk_nr%26); gd->major_name = p->name; gd->flags = sdp->removable ? GENHD_FL_REMOVABLE : 0; + gd->driverfs_dev = &sdp->sdev_driverfs_dev; + gd->flags |= GENHD_FL_DRIVERFS; sd_disks[dsk_nr] = gd; sd_dskname(dsk_nr, diskname); printk(KERN_NOTICE "Attached scsi %sdisk %s at scsi%d, channel %d, " diff --git a/fs/partitions/check.c b/fs/partitions/check.c index db8d2e8269a3..ce9ace5e4ebe 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -131,12 +131,12 @@ static void driverfs_create_partitions(struct gendisk *hd) struct device *dev, *parent; int part; - /* get parent driverfs device structure */ - if (hd->driverfs_dev_arr) - parent = hd->driverfs_dev_arr[0]; - else /* if driverfs not supported by subsystem, skip partitions */ + /* if driverfs not supported by subsystem, skip partitions */ + if (!(hd->flags & GENHD_FL_DRIVERFS)) return; + parent = hd->driverfs_dev; + if (parent) { sprintf(name, "%s", parent->name); sprintf(bus_id, "%s:", parent->bus_id); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index aa2f694ceb1a..5d82071b483c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -67,6 +67,7 @@ struct hd_struct { }; #define GENHD_FL_REMOVABLE 1 +#define GENHD_FL_DRIVERFS 2 struct gendisk { int major; /* major number of driver */ @@ -80,7 +81,7 @@ struct gendisk { struct block_device_operations *fops; devfs_handle_t *de_arr; /* one per physical disc */ - struct device **driverfs_dev_arr;/* support driverfs hierarchy */ + struct device *driverfs_dev; int flags; }; -- cgit v1.2.3 From db09b5fc07571729c8710696b96f708370dde976 Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:03:53 -0700 Subject: [PATCH] (7/25) Removing bogus arrays - ->part[].number Each hd_struct used to have int number; in it. It's used _only_ in disk->part[0] - disk->part[n].number is never assigned/checked for any positive n. Moved from hd_struct to gendisk (disk->part[0].number to disk->number). --- fs/partitions/check.c | 6 +++--- include/linux/genhd.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index ce9ace5e4ebe..30f35cc619ae 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -300,8 +300,8 @@ static void devfs_register_disc(struct gendisk *dev) } if (!devfs_handle) devfs_handle = devfs_mk_dir(NULL, "discs", NULL); - p[0].number = devfs_alloc_unique_number (&disc_numspace); - sprintf(symlink, "disc%d", p[0].number); + dev->number = devfs_alloc_unique_number (&disc_numspace); + sprintf(symlink, "disc%d", dev->number); devfs_mk_symlink (devfs_handle, symlink, DEVFS_FL_DEFAULT, dirname + pos, &slave, NULL); p[0].de = devfs_register (dir, "disc", devfs_flags, @@ -333,7 +333,7 @@ void devfs_register_partitions (struct gendisk *dev, int unregister) if (unregister) { devfs_unregister(p[0].de); p[0].de = NULL; - devfs_dealloc_unique_number(&disc_numspace, p[0].number); + devfs_dealloc_unique_number(&disc_numspace, dev->number); } #endif /* CONFIG_DEVFS_FS */ } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5d82071b483c..7ac93efcf99a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -62,7 +62,6 @@ struct hd_struct { unsigned long start_sect; unsigned long nr_sects; devfs_handle_t de; /* primary (master) devfs entry */ - int number; /* stupid old code wastes space */ struct device hd_driverfs_dev; /* support driverfs hiearchy */ }; @@ -83,6 +82,7 @@ struct gendisk { devfs_handle_t *de_arr; /* one per physical disc */ struct device *driverfs_dev; int flags; + int number; /* devfs crap */ }; /* drivers/block/genhd.c */ -- cgit v1.2.3 From 06f55b095f81fd6e2113911d2e983d63c6f0ac86 Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:03:58 -0700 Subject: [PATCH] (8/25) Removing bogus arrays - ->de_arr[] similar to ->flags and ->driverfs_dev_arr, ->de_arr[] got replaced with its (single) element + flag. --- drivers/block/cpqarray.c | 13 +++++-------- drivers/ide/ide-cd.c | 4 ++-- drivers/ide/ide-disk.c | 3 ++- drivers/ide/ide-floppy.c | 3 ++- drivers/ide/ide-probe.c | 9 --------- drivers/ide/ide.c | 2 -- drivers/s390/block/dasd.c | 4 ++-- drivers/s390/block/dasd_genhd.c | 3 +-- drivers/scsi/sd.c | 6 ++---- fs/partitions/check.c | 10 +++++----- include/linux/genhd.h | 3 ++- 11 files changed, 23 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 396a114b4e24..658f201efd0f 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -75,7 +75,6 @@ MODULE_LICENSE("GPL"); static int nr_ctlr; static ctlr_info_t *hba[MAX_CTLR]; -static devfs_handle_t de_arr[MAX_CTLR][NWD]; static int eisa[8]; @@ -451,7 +450,7 @@ int __init cpqarray_init(void) disk->first_minor = j<minor_shift = NWD_SHIFT; disk->part = ida + i*256 + (j<de_arr = &de_arr[i][j]; + disk->flags = GENHD_FL_DEVFS; disk->fops = &ida_fops; if (!drv->nr_blks) continue; @@ -1665,6 +1664,7 @@ static void getgeometry(int ctlr) (log_index < id_ctlr_buf->nr_drvs) && (log_unit < NWD); log_unit++) { + struct gendisk *disk = ida_gendisk + ctlr * NWD + log_unit; size = sizeof(sense_log_drv_stat_t); @@ -1729,13 +1729,10 @@ static void getgeometry(int ctlr) return; } - if (!de_arr[ctlr][log_unit]) { + if (!disk->de) { char txt[16]; - - sprintf(txt, "ida/c%dd%d", ctlr, - log_unit); - de_arr[ctlr][log_unit] = - devfs_mk_dir(NULL, txt, NULL); + sprintf(txt,"ida/c%dd%d",ctlr,log_unit); + disk->de = devfs_mk_dir(NULL,txt,NULL); } info_p->phys_drives = sense_config_buf->ctlr_phys_drv; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 860f2436179d..216ec47a2b32 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -3148,8 +3148,8 @@ static int ide_cdrom_reinit (ide_drive_t *drive) cdrom_read_toc(drive, &sense); g->minor_shift = 0; /* probably bogus, but that's the old behaviour */ - g->de_arr[0] = NULL; - g->flags = 0; + g->de = NULL; + g->flags = GENHD_FL_DEVFS; add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 5fb72ddaf952..144c0bb95155 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1703,8 +1703,9 @@ static int idedisk_reinit(ide_drive_t *drive) } DRIVER(drive)->busy--; g->minor_shift = PARTN_BITS; - g->de_arr[0] = drive->de; + g->de = drive->de; g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; + g->flags |= GENHD_FL_DEVFS; add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 94f57d4b5b9f..5e2ca16310fa 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -2211,8 +2211,9 @@ static int idefloppy_reinit (ide_drive_t *drive) idefloppy_setup (drive, floppy); DRIVER(drive)->busy--; g->minor_shift = PARTN_BITS; - g->de_arr[0] = drive->de; + g->de = drive->de; g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; + g->flags |= GENHD_FL_DEVFS; add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 9fc3488c111b..50e98a9e6066 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -808,7 +808,6 @@ static void init_gendisk (ide_hwif_t *hwif) { struct gendisk *gd; struct hd_struct *part; - devfs_handle_t *de_arr; unsigned int unit, units, minors; extern devfs_handle_t ide_devfs_handle; char *names; @@ -835,11 +834,6 @@ static void init_gendisk (ide_hwif_t *hwif) memset(part, 0, minors * sizeof(struct hd_struct)); - de_arr = kmalloc(sizeof(devfs_handle_t) * MAX_DRIVES, GFP_KERNEL); - if (!de_arr) - goto err_kmalloc_gd_de_arr; - memset(de_arr, 0, sizeof(devfs_handle_t) * MAX_DRIVES); - names = kmalloc (4 * MAX_DRIVES, GFP_KERNEL); if (!names) goto err_kmalloc_gd_names; @@ -847,7 +841,6 @@ static void init_gendisk (ide_hwif_t *hwif) for (unit = 0; unit < units; ++unit) { gd[unit].part = part + (unit << PARTN_BITS); - gd[unit].de_arr = de_arr + unit; hwif->drives[unit].part = gd[unit].part; gd[unit].major = hwif->major; gd[unit].first_minor = unit << PARTN_BITS; @@ -884,8 +877,6 @@ static void init_gendisk (ide_hwif_t *hwif) return; err_kmalloc_gd_names: - kfree(de_arr); -err_kmalloc_gd_de_arr: kfree(part); err_kmalloc_gd_part: kfree(gd); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 7c1ed4dd8f25..08950237c5d6 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -2062,8 +2062,6 @@ void ide_unregister (unsigned int index) if (gd) { int i; kfree(gd->part); - if (gd->de_arr) - kfree (gd->de_arr); kfree(gd); for (i = 0; i < MAX_DRIVES; i++) hwif->gd[i] = NULL; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8f7670f944ed..422e6e6e9d3c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -302,7 +302,7 @@ dasd_state_new_to_known(dasd_device_t *device) /* Add a proc directory and the dasd device entry to devfs. */ sprintf(buffer, "%04x", device->devinfo.devno); dir = devfs_mk_dir(dasd_devfs_handle, buffer, device); - gdp->de_arr[0] = dir; + gdp->de = dir; if (devmap->features & DASD_FEATURE_READONLY) devfs_perm = S_IFBLK | S_IRUSR; else @@ -328,7 +328,7 @@ dasd_state_known_to_new(dasd_device_t * device) return; /* Remove device entry and devfs directory. */ devfs_unregister(device->devfs_entry); - devfs_unregister(gdp->de_arr[0]); + devfs_unregister(gdp->de); /* Forget the discipline information. */ device->discipline = NULL; diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 82d060ba4026..700285728e59 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -34,7 +34,6 @@ struct major_info { struct list_head list; int major; struct gendisk disks[DASD_PER_MAJOR]; - devfs_handle_t de_arr[DASD_PER_MAJOR]; char names[DASD_PER_MAJOR * 8]; struct hd_struct part[1<first_minor = i << DASD_PARTN_BITS; disk->minor_shift = DASD_PARTN_BITS; disk->fops = &dasd_device_operations; - disk->de_arr = mi->de_arr + i; + disk->flags = GENHD_FL_DEVFS; disk->part = mi->part + (i << DASD_PARTN_BITS); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 79deae0ef3f9..39112441f969 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1300,7 +1300,6 @@ static int sd_attach(Scsi_Device * sdp) unsigned long iflags; struct { struct gendisk disk; - devfs_handle_t de; char name[5]; } *p; struct gendisk *gd; @@ -1313,7 +1312,6 @@ static int sd_attach(Scsi_Device * sdp) if (!p) return 1; gd = &p->disk; - gd->de_arr = &p->de; SCSI_LOG_HLQUEUE(3, printk("sd_attach: scsi device: <%d,%d,%d,%d>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun)); @@ -1345,7 +1343,7 @@ static int sd_attach(Scsi_Device * sdp) } sd_template.nr_dev++; - gd->de_arr[0] = sdp->de; + gd->de = sdp->de; gd->major = SD_MAJOR(dsk_nr>>4); gd->first_minor = (dsk_nr & 15)<<4; gd->minor_shift = 4; @@ -1358,7 +1356,7 @@ static int sd_attach(Scsi_Device * sdp) gd->major_name = p->name; gd->flags = sdp->removable ? GENHD_FL_REMOVABLE : 0; gd->driverfs_dev = &sdp->sdev_driverfs_dev; - gd->flags |= GENHD_FL_DRIVERFS; + gd->flags |= GENHD_FL_DRIVERFS | GENHD_FL_DEVFS; sd_disks[dsk_nr] = gd; sd_dskname(dsk_nr, diskname); printk(KERN_NOTICE "Attached scsi %sdisk %s at scsi%d, channel %d, " diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 30f35cc619ae..0401913788df 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -203,8 +203,8 @@ void check_partition(struct gendisk *hd, struct block_device *bdev) if (!state) return; - if (hd->de_arr) - de = hd->de_arr[0]; + if (hd->flags & GENHD_FL_DEVFS) + de = hd->de; i = devfs_generate_path (de, buf, sizeof buf); if (i >= 0) { printk(KERN_INFO " /dev/%s:", buf + i); @@ -284,8 +284,8 @@ static void devfs_register_disc(struct gendisk *dev) return; if (dev->flags & GENHD_FL_REMOVABLE) devfs_flags |= DEVFS_FL_REMOVABLE; - if (dev->de_arr) { - dir = dev->de_arr[0]; + if (dev->flags & GENHD_FL_DEVFS) { + dir = dev->de; if (!dir) /* Aware driver wants to block disc management */ return; pos = devfs_generate_path(dir, dirname + 3, sizeof dirname-3); @@ -308,7 +308,7 @@ static void devfs_register_disc(struct gendisk *dev) dev->major, dev->first_minor, S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); devfs_auto_unregister(p[0].de, slave); - if (!dev->de_arr) + if (!(dev->flags & GENHD_FL_DEVFS)) devfs_auto_unregister (slave, dir); } #endif /* CONFIG_DEVFS_FS */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7ac93efcf99a..ee2d41d8bd4d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -67,6 +67,7 @@ struct hd_struct { #define GENHD_FL_REMOVABLE 1 #define GENHD_FL_DRIVERFS 2 +#define GENHD_FL_DEVFS 4 struct gendisk { int major; /* major number of driver */ @@ -79,7 +80,7 @@ struct gendisk { struct gendisk *next; struct block_device_operations *fops; - devfs_handle_t *de_arr; /* one per physical disc */ + devfs_handle_t de; struct device *driverfs_dev; int flags; int number; /* devfs crap */ -- cgit v1.2.3 From 897c924ed245812b0fafc96da5b2eb0232697e00 Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:04:02 -0700 Subject: [PATCH] (9/25) update_partition() new helper - update_partition(disk, partition_number); does the right thing wrt devfs and driverfs (un)registration of partition entries. BLKPG ioctls fixed - now they call that beast rather than calling only devfs side. New helper - rescan_partitions(disk, bdev); does all work with wiping/rereading/etc. and fs/block_dev.c now uses it instead of check_partition(). The latter became static. --- drivers/block/blkpg.c | 4 +- drivers/block/genhd.c | 19 +------ fs/block_dev.c | 48 +++------------- fs/partitions/check.c | 150 ++++++++++++++++++++++++++++++++----------------- include/linux/blkdev.h | 2 - include/linux/genhd.h | 5 +- 6 files changed, 115 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c index 106df6b4644c..9fbfaafbea25 100644 --- a/drivers/block/blkpg.c +++ b/drivers/block/blkpg.c @@ -107,7 +107,7 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p) /* all seems OK */ g->part[p->pno].start_sect = pstart; g->part[p->pno].nr_sects = plength; - devfs_register_partitions (g, 0); + update_partition(g, p->pno); return 0; } @@ -156,7 +156,7 @@ int del_partition(struct block_device *bdev, struct blkpg_partition *p) g->part[p->pno].start_sect = 0; g->part[p->pno].nr_sects = 0; - devfs_register_partitions (g, 0); + update_partition(g, p->pno); bd_release(bdevp); bdput(bdevp); diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index d9e3c2052b21..589ae73d7366 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -67,22 +67,11 @@ out: } EXPORT_SYMBOL(add_gendisk); +EXPORT_SYMBOL(del_gendisk); - -/** - * del_gendisk - remove partitioning information from kernel list - * @gp: per-device partitioning information - * - * This function unregisters the partitioning information in @gp - * with the kernel. - */ -void -del_gendisk(struct gendisk *disk) +void unlink_gendisk(struct gendisk *disk) { struct gendisk **p; - - driverfs_remove_partitions(disk); - wipe_partitions(disk); write_lock(&gendisk_lock); for (p = &gendisk_head; *p; p = &((*p)->next)) if (*p == disk) @@ -90,12 +79,8 @@ del_gendisk(struct gendisk *disk) if (*p) *p = (*p)->next; write_unlock(&gendisk_lock); - devfs_register_partitions(disk, 1); } -EXPORT_SYMBOL(del_gendisk); - - /** * get_gendisk - get partitioning information for a given device * @dev: device to get partitioning information for diff --git a/fs/block_dev.c b/fs/block_dev.c index 637c6f0f387d..30e46a931b2f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -527,22 +527,13 @@ int check_disk_change(struct block_device *bdev) int full_check_disk_change(struct block_device *bdev) { - int res; + int res = 0; if (bdev->bd_contains != bdev) BUG(); down(&bdev->bd_sem); - res = check_disk_change(bdev); - if (bdev->bd_invalidated && !bdev->bd_part_count) { - struct gendisk *disk = get_gendisk(to_kdev_t(bdev->bd_dev)); - int p; - bdev->bd_invalidated = 0; - for (p = 1; p < 1<minor_shift; p++) { - disk->part[p].start_sect = 0; - disk->part[p].nr_sects = 0; - } - res = invalidate_device(to_kdev_t(bdev->bd_dev), 1); - if (disk->part[0].nr_sects) - check_partition(disk, bdev); + if (check_disk_change(bdev)) { + rescan_partitions(get_gendisk(to_kdev_t(bdev->bd_dev)), bdev); + res = 1; } up(&bdev->bd_sem); return res; @@ -654,17 +645,8 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * inode->i_data.backing_dev_info = bdi; bdev->bd_inode->i_data.backing_dev_info = bdi; } - if (bdev->bd_invalidated && !bdev->bd_part_count) { - int p; - bdev->bd_invalidated = 0; - for (p = 1; p < 1<minor_shift; p++) { - g->part[p].start_sect = 0; - g->part[p].nr_sects = 0; - } - invalidate_device(dev, 1); - if (g->part[0].nr_sects) - check_partition(g, bdev); - } + if (bdev->bd_invalidated) + rescan_partitions(g, bdev); } else { down(&bdev->bd_contains->bd_sem); bdev->bd_contains->bd_part_count++; @@ -799,7 +781,7 @@ static int blkdev_reread_part(struct block_device *bdev) { kdev_t dev = to_kdev_t(bdev->bd_dev); struct gendisk *disk = get_gendisk(dev); - int p, res; + int res = 0; if (!disk || !disk->minor_shift || bdev != bdev->bd_contains) return -EINVAL; @@ -807,21 +789,7 @@ static int blkdev_reread_part(struct block_device *bdev) return -EACCES; if (down_trylock(&bdev->bd_sem)) return -EBUSY; - if (bdev->bd_part_count) { - up(&bdev->bd_sem); - return -EBUSY; - } - for (p = 1; p < 1 << disk->minor_shift; p++) { - disk->part[p].start_sect = 0; - disk->part[p].nr_sects = 0; - } - res = invalidate_device(dev, 1); - if (!res) { - if (bdev->bd_op->revalidate) - bdev->bd_op->revalidate(dev); - if (disk->part[0].nr_sects) - check_partition(disk, bdev); - } + res = rescan_partitions(disk, bdev); up(&bdev->bd_sem); return res; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0401913788df..8927d25ef60d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -149,29 +149,29 @@ static void driverfs_create_partitions(struct gendisk *hd) sprintf(dev->name, "%sdisc", name); sprintf(dev->bus_id, "%sdisc", bus_id); for (part=1; part < max_p; part++) { + dev = &p[part].hd_driverfs_dev; + sprintf(dev->name, "%spart%d", name, part); + sprintf(dev->bus_id, "%s:p%d", bus_id, part); if (!p[part].nr_sects) continue; - dev = &p[part].hd_driverfs_dev; dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor+part); - sprintf(dev->name, "%spart%d", name, part); - sprintf(dev->bus_id, "%s:p%d", bus_id, part); } for (part=0; part < max_p; part++) { dev = &p[part].hd_driverfs_dev; - if (!dev->driver_data) - continue; dev->parent = parent; if (parent) dev->bus = parent->bus; + if (!dev->driver_data) + continue; device_register(dev); device_create_file(dev, &dev_attr_type); device_create_file(dev, &dev_attr_kdev); } } -void driverfs_remove_partitions(struct gendisk *hd) +static void driverfs_remove_partitions(struct gendisk *hd) { int max_p = 1<minor_shift; struct hd_struct *p; @@ -188,10 +188,7 @@ void driverfs_remove_partitions(struct gendisk *hd) } } -/* - * DON'T EXPORT - */ -void check_partition(struct gendisk *hd, struct block_device *bdev) +static void check_partition(struct gendisk *hd, struct block_device *bdev) { devfs_handle_t de = NULL; dev_t dev = bdev->bd_dev; @@ -226,7 +223,7 @@ void check_partition(struct gendisk *hd, struct block_device *bdev) if (res < 0) { if (warn_no_part) printk(" unable to read partition table\n"); - goto out; + return; } p = hd->part; for (j = 1; j < state->limit; j++) { @@ -238,18 +235,14 @@ void check_partition(struct gendisk *hd, struct block_device *bdev) md_autodetect_dev(dev+j); #endif } - goto out; + return; } - printk(" unknown partition table\n"); -out: - driverfs_create_partitions(hd); - devfs_register_partitions(hd, 0); } -#ifdef CONFIG_DEVFS_FS static void devfs_register_partition(struct gendisk *dev, int part) { +#ifdef CONFIG_DEVFS_FS devfs_handle_t dir; unsigned int devfs_flags = DEVFS_FL_DEFAULT; struct hd_struct *p = dev->part; @@ -262,26 +255,29 @@ static void devfs_register_partition(struct gendisk *dev, int part) return; if (dev->flags & GENHD_FL_REMOVABLE) devfs_flags |= DEVFS_FL_REMOVABLE; - sprintf (devname, "part%d", part); + sprintf(devname, "part%d", part); p[part].de = devfs_register (dir, devname, devfs_flags, dev->major, dev->first_minor + part, S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); +#endif } +#ifdef CONFIG_DEVFS_FS static struct unique_numspace disc_numspace = UNIQUE_NUMBERSPACE_INITIALISER; +#endif -static void devfs_register_disc(struct gendisk *dev) +static void devfs_create_partitions(struct gendisk *dev) { +#ifdef CONFIG_DEVFS_FS int pos = 0; devfs_handle_t dir, slave; unsigned int devfs_flags = DEVFS_FL_DEFAULT; char dirname[64], symlink[16]; static devfs_handle_t devfs_handle; + int part, max_p = 1<minor_shift; struct hd_struct *p = dev->part; - if (p[0].de) - return; if (dev->flags & GENHD_FL_REMOVABLE) devfs_flags |= DEVFS_FL_REMOVABLE; if (dev->flags & GENHD_FL_DEVFS) { @@ -304,38 +300,28 @@ static void devfs_register_disc(struct gendisk *dev) sprintf(symlink, "disc%d", dev->number); devfs_mk_symlink (devfs_handle, symlink, DEVFS_FL_DEFAULT, dirname + pos, &slave, NULL); - p[0].de = devfs_register (dir, "disc", devfs_flags, + p->de = devfs_register(dir, "disc", devfs_flags, dev->major, dev->first_minor, S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); - devfs_auto_unregister(p[0].de, slave); + devfs_auto_unregister(p->de, slave); if (!(dev->flags & GENHD_FL_DEVFS)) devfs_auto_unregister (slave, dir); + for (part = 1, p++; part < max_p; part++, p++) + if (p->nr_sects) + devfs_register_partition(dev, part); +#endif } -#endif /* CONFIG_DEVFS_FS */ -void devfs_register_partitions (struct gendisk *dev, int unregister) +static void devfs_remove_partitions(struct gendisk *dev) { #ifdef CONFIG_DEVFS_FS - int part, max_p; - struct hd_struct *p = dev->part; - - if (!unregister) - devfs_register_disc(dev); - max_p = (1 << dev->minor_shift); - for (part = 1; part < max_p; part++) { - if ( unregister || (p[part].nr_sects < 1) ) { - devfs_unregister(p[part].de); - p[part].de = NULL; - continue; - } - devfs_register_partition(dev, part); - } - if (unregister) { - devfs_unregister(p[0].de); - p[0].de = NULL; - devfs_dealloc_unique_number(&disc_numspace, dev->number); + int part; + for (part = 1<minor_shift; part--; ) { + devfs_unregister(dev->part[part].de); + dev->part[part].de = NULL; } -#endif /* CONFIG_DEVFS_FS */ + devfs_dealloc_unique_number(&disc_numspace, dev->number); +#endif } /* @@ -348,20 +334,20 @@ void devfs_register_partitions (struct gendisk *dev, int unregister) * done */ -void register_disk(struct gendisk *g, kdev_t dev, unsigned minors, +void register_disk(struct gendisk *disk, kdev_t dev, unsigned minors, struct block_device_operations *ops, long size) { struct block_device *bdev; struct hd_struct *p; - if (!g) + if (!disk) return; - p = g->part; + p = disk->part; p[0].nr_sects = size; /* No minors to use for partitions */ - if (!g->minor_shift) + if (!disk->minor_shift) return; /* No such device (e.g., media were just removed) */ @@ -371,10 +357,66 @@ void register_disk(struct gendisk *g, kdev_t dev, unsigned minors, bdev = bdget(kdev_t_to_nr(dev)); if (blkdev_get(bdev, FMODE_READ, 0, BDEV_RAW) < 0) return; - check_partition(g, bdev); + check_partition(disk, bdev); + driverfs_create_partitions(disk); + devfs_create_partitions(disk); blkdev_put(bdev, BDEV_RAW); } +void update_partition(struct gendisk *disk, int part) +{ + struct hd_struct *p = disk->part + part; + struct device *dev = &p->hd_driverfs_dev; + + if (!p->nr_sects) { + if (p->de) { + devfs_unregister(p->de); + p->de = NULL; + } + if (dev->driver_data) { + device_remove_file(dev, &dev_attr_type); + device_remove_file(dev, &dev_attr_kdev); + put_device(dev); + dev->driver_data = NULL; + } + return; + } + if (!p->de) + devfs_register_partition(disk, part); + if (dev->driver_data || !(disk->flags & GENHD_FL_DRIVERFS)) + return; + dev->driver_data = + (void *)(long)__mkdev(disk->major, disk->first_minor+part); + device_register(dev); + device_create_file(dev, &dev_attr_type); + device_create_file(dev, &dev_attr_kdev); +} + +int rescan_partitions(struct gendisk *disk, struct block_device *bdev) +{ + kdev_t dev = to_kdev_t(bdev->bd_dev); + int p, res; + if (!bdev->bd_invalidated) + return 0; + if (bdev->bd_part_count) + return -EBUSY; + res = invalidate_device(dev, 1); + if (res) + return res; + bdev->bd_invalidated = 0; + for (p = 1; p < (1<minor_shift); p++) { + disk->part[p].start_sect = 0; + disk->part[p].nr_sects = 0; + } + if (bdev->bd_op->revalidate) + bdev->bd_op->revalidate(dev); + if (disk->part[0].nr_sects) + check_partition(disk, bdev); + for (p = 1; p < (1<minor_shift); p++) + update_partition(disk, p); + return res; +} + unsigned char *read_dev_sector(struct block_device *bdev, unsigned long n, Sector *p) { struct address_space *mapping = bdev->bd_inode->i_mapping; @@ -398,7 +440,7 @@ fail: return NULL; } -int wipe_partitions(struct gendisk *disk) +static int wipe_partitions(struct gendisk *disk) { int max_p = 1 << disk->minor_shift; int p; @@ -419,3 +461,11 @@ int wipe_partitions(struct gendisk *disk) } return 0; } + +void del_gendisk(struct gendisk *disk) +{ + driverfs_remove_partitions(disk); + wipe_partitions(disk); + unlink_gendisk(disk); + devfs_remove_partitions(disk); +} diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 77fec95ff4b4..a11b6181c76f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -281,9 +281,7 @@ struct sec_size { extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern int wipe_partitions(struct gendisk *disk); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); -extern void check_partition(struct gendisk *disk, struct block_device *bdev); extern void generic_make_request(struct bio *bio); extern inline request_queue_t *bdev_get_queue(struct block_device *bdev); extern void blk_put_request(struct request *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ee2d41d8bd4d..6474393eff5e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -89,6 +89,7 @@ struct gendisk { /* drivers/block/genhd.c */ extern void add_gendisk(struct gendisk *gp); extern void del_gendisk(struct gendisk *gp); +extern void unlink_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(kdev_t dev); static inline unsigned long get_start_sect(struct block_device *bdev) { @@ -244,8 +245,8 @@ struct unixware_disklabel { char *disk_name (struct gendisk *hd, int part, char *buf); -extern void devfs_register_partitions (struct gendisk *dev, int unregister); -extern void driverfs_remove_partitions (struct gendisk *hd); +extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); +extern void update_partition(struct gendisk *disk, int part); static inline unsigned int disk_index (kdev_t dev) { -- cgit v1.2.3 From 07586b3328bc910bf2cc4c25f7eb2dea7555a69c Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:05:04 -0700 Subject: [PATCH] (23/25) move pointer to gendisk from hwif to drive ide switched from hwif->gd[i] to hwif->drive[i]->disk - IOW, instead of array of two pointers to gendisks refered from hwif, we keep these pointers in relevant drives. Cleaned up. --- drivers/ide/ide-cd.c | 23 +++++++++-------------- drivers/ide/ide-disk.c | 8 ++------ drivers/ide/ide-floppy.c | 8 ++------ drivers/ide/ide-probe.c | 2 +- drivers/ide/ide.c | 9 +++------ include/linux/ide.h | 2 +- 6 files changed, 18 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 216ec47a2b32..81ca240c6be5 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2593,9 +2593,8 @@ static int ide_cdrom_register (ide_drive_t *drive, int nslots) { struct cdrom_info *info = drive->driver_data; struct cdrom_device_info *devinfo = &info->devinfo; - int minor = (drive->select.b.unit) << PARTN_BITS; - devinfo->dev = mk_kdev(HWIF(drive)->major, minor); + devinfo->dev = mk_kdev(drive->disk->major, drive->disk->first_minor); devinfo->ops = &ide_cdrom_dops; devinfo->mask = 0; *(int *)&devinfo->speed = CDROM_STATE_FLAGS (drive)->current_speed; @@ -2622,7 +2621,8 @@ static int ide_cdrom_register (ide_drive_t *drive, int nslots) devinfo->mask |= CDC_CLOSE_TRAY; devinfo->de = devfs_register(drive->de, "cd", DEVFS_FL_DEFAULT, - HWIF(drive)->major, minor, + drive->disk->major, + drive->disk->first_minor, S_IFBLK | S_IRUGO | S_IWUGO, ide_fops, NULL); @@ -2823,13 +2823,12 @@ int ide_cdrom_setup (ide_drive_t *drive) { struct cdrom_info *info = drive->driver_data; struct cdrom_device_info *cdi = &info->devinfo; - int minor = drive->select.b.unit << PARTN_BITS; int nslots; /* * default to read-only always and fix latter at the bottom */ - set_device_ro(mk_kdev(HWIF(drive)->major, minor), 1); + set_device_ro(mk_kdev(drive->disk->major, drive->disk->first_minor), 1); blk_queue_hardsect_size(&drive->queue, CD_FRAMESIZE); blk_queue_prep_rq(&drive->queue, ll_10byte_cmd_build); @@ -2951,7 +2950,7 @@ int ide_cdrom_setup (ide_drive_t *drive) nslots = ide_cdrom_probe_capabilities (drive); if (CDROM_CONFIG_FLAGS(drive)->dvd_ram) - set_device_ro(mk_kdev(HWIF(drive)->major, minor), 0); + set_device_ro(mk_kdev(drive->disk->major, drive->disk->first_minor), 0); if (ide_cdrom_register (drive, nslots)) { printk ("%s: ide_cdrom_setup failed to register device with the cdrom driver.\n", drive->name); @@ -2998,8 +2997,8 @@ void ide_cdrom_release (struct inode *inode, struct file *file, static int ide_cdrom_check_media_change (ide_drive_t *drive) { - return cdrom_media_changed(mk_kdev(HWIF (drive)->major, - (drive->select.b.unit) << PARTN_BITS)); + return cdrom_media_changed(mk_kdev(drive->disk->major, + drive->disk->first_minor)); } static @@ -3025,9 +3024,7 @@ int ide_cdrom_cleanup(ide_drive_t *drive) { struct cdrom_info *info = drive->driver_data; struct cdrom_device_info *devinfo = &info->devinfo; - ide_hwif_t *hwif = HWIF(drive); - int unit = drive - hwif->drives; - struct gendisk *g = hwif->gd[unit]; + struct gendisk *g = drive->disk; if (ide_unregister_subdriver (drive)) return 1; @@ -3092,9 +3089,7 @@ MODULE_DESCRIPTION("ATAPI CD-ROM Driver"); static int ide_cdrom_reinit (ide_drive_t *drive) { struct cdrom_info *info; - ide_hwif_t *hwif = HWIF(drive); - int unit = drive - hwif->drives; - struct gendisk *g = hwif->gd[unit]; + struct gendisk *g = drive->disk; struct request_sense sense; if (!strstr("ide-cdrom", drive->driver_req)) diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 144c0bb95155..e8040b7626f7 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1618,9 +1618,7 @@ static void idedisk_setup (ide_drive_t *drive) static int idedisk_cleanup (ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); - int unit = drive - hwif->drives; - struct gendisk *g = hwif->gd[unit]; + struct gendisk *g = drive->disk; if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache) if (do_idedisk_flushcache(drive)) printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n", @@ -1672,9 +1670,7 @@ MODULE_DESCRIPTION("ATA DISK Driver"); static int idedisk_reinit(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); - int unit = drive - hwif->drives; - struct gendisk *g = hwif->gd[unit]; + struct gendisk *g = drive->disk; /* strstr("foo", "") is non-NULL */ if (!strstr("ide-disk", drive->driver_req)) diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 5e2ca16310fa..c20115bc35f1 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -2112,9 +2112,7 @@ static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) static int idefloppy_cleanup (ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; - ide_hwif_t *hwif = HWIF(drive); - int unit = drive - hwif->drives; - struct gendisk *g = hwif->gd[unit]; + struct gendisk *g = drive->disk; if (ide_unregister_subdriver (drive)) return 1; @@ -2181,9 +2179,7 @@ static ide_driver_t idefloppy_driver = { static int idefloppy_reinit (ide_drive_t *drive) { idefloppy_floppy_t *floppy; - ide_hwif_t *hwif = HWIF(drive); - int unit = drive - hwif->drives; - struct gendisk *g = hwif->gd[unit]; + struct gendisk *g = drive->disk; if (!strstr("ide-floppy", drive->driver_req)) goto failed; if (!drive->present) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 50e98a9e6066..47336c9d408e 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -848,7 +848,7 @@ static void init_gendisk (ide_hwif_t *hwif) gd[unit].major_name = names + 4*unit; gd[unit].minor_shift = PARTN_BITS; gd[unit].fops = ide_fops; - hwif->gd[unit] = gd + unit; + hwif->drives[unit].disk = gd + unit; } for (unit = 0; unit < units; ++unit) { diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 08950237c5d6..52472f955262 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1748,10 +1748,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio void ide_revalidate_drive (ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); - int unit = drive - hwif->drives; - struct gendisk *g = hwif->gd[unit]; - g->part[0].nr_sects = current_capacity(drive); + drive->disk->part[0].nr_sects = current_capacity(drive); } /* @@ -2058,13 +2055,13 @@ void ide_unregister (unsigned int index) blk_dev[hwif->major].data = NULL; blk_dev[hwif->major].queue = NULL; blk_clear(hwif->major); - gd = hwif->gd[0]; + gd = hwif->drives[0].disk; if (gd) { int i; kfree(gd->part); kfree(gd); for (i = 0; i < MAX_DRIVES; i++) - hwif->gd[i] = NULL; + hwif->drives[i].disk = NULL; } old_hwif = *hwif; init_hwif_data (index); /* restore hwif data to pristine status */ diff --git a/include/linux/ide.h b/include/linux/ide.h index aacdaad8f594..501b97902799 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -530,6 +530,7 @@ typedef struct ide_drive_s { unsigned int failures; /* current failure count */ unsigned int max_failures; /* maximum allowed failure count */ struct list_head list; + struct gendisk *disk; } ide_drive_t; /* @@ -716,7 +717,6 @@ typedef struct hwif_s { */ hw_regs_t hw; /* Hardware info */ ide_drive_t drives[MAX_DRIVES]; /* drive info */ - struct gendisk *gd[MAX_DRIVES];/* gendisk structure */ int addressing; /* hosts addressing */ void (*tuneproc)(ide_drive_t *, byte); /* routine to tune PIO mode for drives */ int (*speedproc)(ide_drive_t *, byte); /* routine to retune DMA modes for drives */ -- cgit v1.2.3 From 3708de94cf8d66e46d5ab7d712da81684ff99bf7 Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:05:09 -0700 Subject: [PATCH] (24/25) disk capacity helpers new helpers - get_capacity(gendisk)/set_capacity(gendisk, sectors). Drivers switched to these; that eliminates most of the accesses to disk->part[]... in the drivers (and makes code more readable, while we are at it). That had caught several bugs when minor had been used in place of minor>>minor_shift (acsi.c is especially nasty in that respect; I don't know if it had ever been used with multiple devices...) --- drivers/acorn/block/mfmhd.c | 67 ++++++++++++++++++----------------------- drivers/block/DAC960.c | 7 ++--- drivers/block/acsi.c | 26 +++++++--------- drivers/block/cciss.c | 5 ++- drivers/block/cpqarray.c | 2 +- drivers/block/paride/pd.c | 6 ++-- drivers/block/ps2esdi.c | 14 +++++---- drivers/block/umem.c | 2 +- drivers/block/xd.c | 8 +++-- drivers/ide/hd.c | 18 +++++------ drivers/ide/ide-cd.c | 4 +-- drivers/ide/ide-floppy.c | 6 ++-- drivers/ide/ide-geometry.c | 2 +- drivers/ide/ide.c | 10 +++--- drivers/md/md.c | 4 +-- drivers/message/i2o/i2o_block.c | 24 +++++++-------- drivers/mtd/ftl.c | 6 ++-- drivers/mtd/nftlcore.c | 4 +-- drivers/scsi/sd.c | 8 ++--- drivers/scsi/sr.c | 6 ++-- fs/partitions/check.c | 6 ++-- include/linux/genhd.h | 8 +++++ 22 files changed, 119 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c index 38f7dfc9e82e..8989984b9bda 100644 --- a/drivers/acorn/block/mfmhd.c +++ b/drivers/acorn/block/mfmhd.c @@ -882,6 +882,22 @@ static void mfm_rerequest(void) mfm_request(); } +static struct gendisk mfm_gendisk[2] = { +{ + .major = MAJOR_NR, + .first_minor = 0, + .major_name = "mfma", + .minor_shift = 6, + .part = mfm, +}, +{ + .major = MAJOR_NR, + .first_minor = 64, + .major_name = "mfmb", + .minor_shift = 6, + .part = mfm + 64, +}; + static void mfm_request(void) { DBG("mfm_request CURRENT=%p Busy=%d\n", CURRENT, Busy); @@ -895,7 +911,7 @@ static void mfm_request(void) Busy = 1; while (1) { - unsigned int dev, block, nsect; + unsigned int dev, block, nsect, unit; DBG("mfm_request: loop start\n"); sti(); @@ -912,26 +928,26 @@ static void mfm_request(void) DBG("mfm_request: before arg extraction\n"); dev = minor(CURRENT->rq_dev); + unit = dev>>6; block = CURRENT->sector; nsect = CURRENT->nr_sectors; #ifdef DEBUG - /*if ((dev>>6)==1) */ console_printf("mfm_request: raw vals: dev=%d (block=512 bytes) block=%d nblocks=%d\n", dev, block, nsect); + /*if (unit==1) */ console_printf("mfm_request: raw vals: dev=%d (block=512 bytes) block=%d nblocks=%d\n", dev, block, nsect); #endif - if (dev >= (mfm_drives << 6) || - block >= mfm[dev].nr_sects || ((block+nsect) > mfm[dev].nr_sects)) { - if (dev >= (mfm_drives << 6)) + if (unit >= mfm_drives || + block >= get_capacity(mfm_gendisk + unit) || + ((block+nsect) > get_capacity(mfm_gendisk + unit))) { + if (unit >= mfm_drives) printk("mfm: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); else - printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n", (dev >> 6)+'a', - block, nsect, mfm[dev].nr_sects); + printk("mfm%c: bad access: block=%d, count=%d, nr_sects=%ld\n", unit+'a', + block, nsect, get_capacity(mfm_gendisk+unit)); printk("mfm: continue 1\n"); end_request(CURRENT, 0); Busy = 0; continue; } - block += mfm[dev].start_sect; - /* DAG: Linux doesn't cope with this - even though it has an array telling it the hardware block size - silly */ block <<= 1; /* Now in 256 byte sectors */ @@ -1163,18 +1179,9 @@ static int mfm_initdrives(void) static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg) { struct hd_geometry *geo = (struct hd_geometry *) arg; - kdev_t dev; - int device, minor, err; - - if (!inode || !(dev = inode->i_rdev)) - return -EINVAL; - - minor = minor(dev); - - device = DEVICE_NR(minor(inode->i_rdev)), err; + int device = DEVICE_NR(minor(inode->i_rdev)); if (device >= mfm_drives) return -EINVAL; - if (cmd != HDIO_GETGEO) return -EINVAL; if (!arg) @@ -1185,7 +1192,8 @@ static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long a return -EFAULT; if (put_user (mfm_info[device].cylinders, &geo->cylinders)) return -EFAULT; - if (put_user (mfm[minor].start_sect, &geo->start)) + start = get_start_sect(inode->i_bdev); + if (put_user (get_start_sect(inode->i_bdev), &geo->start)) return -EFAULT; return 0; } @@ -1237,27 +1245,10 @@ void xd_set_geometry(struct block_device *bdev, unsigned char secsptrack, if (raw_cmd.dev == drive) mfm_specify (); mfm_geometry (drive); - mfm[drive << 6].start_sect = 0; - mfm[drive << 6].nr_sects = mfm_info[drive].cylinders * mfm_info[drive].heads * mfm_info[drive].sectors / 2; + set_capacity(&mfm_gendisk[drive], mfm_info[drive].cylinders * mfm_info[drive].heads * mfm_info[drive].sectors / 2); } } -static struct gendisk mfm_gendisk[2] = { -{ - .major = MAJOR_NR, - .first_minor = 0, - .major_name = "mfma", - .minor_shift = 6, - .part = mfm, -}, -{ - .major = MAJOR_NR, - .first_minor = 64, - .major_name = "mfmb", - .minor_shift = 6, - .part = mfm + 64, -}; - static struct block_device_operations mfm_fops = { .owner = THIS_MODULE, diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 72d50aa7b8f2..58415a08c41b 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -2033,10 +2033,9 @@ static long disk_size(DAC960_Controller_T *Controller, int disk) static void DAC960_ComputeGenericDiskInfo(DAC960_Controller_T *Controller) { - struct gendisk *disks = Controller->disks; int disk; for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) - disks->part[0].nr_sects = disk_size(Controller, disk); + set_capacity(Controller->disks + disk, disk_size(Controller, disk)); } static int DAC960_revalidate(kdev_t dev) @@ -2044,7 +2043,7 @@ static int DAC960_revalidate(kdev_t dev) int ctlr = DAC960_ControllerNumber(dev); int disk = DAC960_LogicalDriveNumber(dev); DAC960_Controller_T *p = DAC960_Controllers[ctlr]; - p->disks[disk].part[0].nr_sects = disk_size(p, disk); + set_capacity(&p->disks[disk], disk_size(p, disk)); return 0; } @@ -5276,7 +5275,7 @@ static int DAC960_Open(Inode_T *Inode, File_T *File) DAC960_KernelDevice(Controller->ControllerNumber, LogicalDriveNumber, 0), DAC960_MaxPartitions, &DAC960_BlockDeviceOperations, size); } - if (Controller->disks[LogicalDriveNumber].part[0].nr_sects == 0) + if (!get_capacity(&Controller->disks[LogicalDriveNumber])) return -ENXIO; /* Increment Controller and Logical Drive Usage Counts. diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c index e333d4d375c7..1197c8b3bb86 100644 --- a/drivers/block/acsi.c +++ b/drivers/block/acsi.c @@ -783,7 +783,7 @@ static void read_intr( void ) status = acsi_getstatus(); if (status != 0) { - int dev = minor(CURRENT->rq_dev); + int dev = DEVICE_NR(CURRENT->rq_dev); printk( KERN_ERR "ad%c: ", dev+'a' ); if (!acsi_reqsense( acsi_buffer, acsi_info[dev].target, acsi_info[dev].lun)) @@ -814,7 +814,7 @@ static void write_intr(void) status = acsi_getstatus(); if (status != 0) { - int dev = minor(CURRENT->rq_dev); + int dev = DEVICE_NR(CURRENT->rq_dev); printk( KERN_ERR "ad%c: ", dev+'a' ); if (!acsi_reqsense( acsi_buffer, acsi_info[dev].target, acsi_info[dev].lun)) @@ -973,15 +973,15 @@ static void redo_acsi_request( void ) panic(DEVICE_NAME ": block not locked"); } - dev = minor(CURRENT->rq_dev); + dev = DEVICE_NR(CURRENT->rq_dev); block = CURRENT->sector; if (dev >= NDevices || - block+CURRENT->nr_sectors >= acsi_part[dev].nr_sects) { + block+CURRENT->nr_sectors >= get_capacity(acsi_gendisk + dev)) { #ifdef DEBUG printk( "ad%c: attempted access for blocks %d...%ld past end of device at block %ld.\n", dev+'a', block, block + CURRENT->nr_sectors - 1, - acsi_part[dev].nr_sects); + get_capacity(acsi_gendisk + dev)); #endif end_request(CURRENT, 0); goto repeat; @@ -1088,11 +1088,7 @@ static void redo_acsi_request( void ) static int acsi_ioctl( struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg ) { - dev_t dev; - - if (!inode) - return -EINVAL; - dev = minor(inode->i_rdev); + int dev = DEVICE_NR(inode->i_rdev); if (dev >= NDevices) return -EINVAL; switch (cmd) { @@ -1140,7 +1136,7 @@ static int acsi_open( struct inode * inode, struct file * filp ) int device; struct acsi_info_struct *aip; - device = minor(inode->i_rdev); + device = DEVICE_NR(inode->i_rdev); if (device >= NDevices) return -ENXIO; aip = &acsi_info[device]; @@ -1176,7 +1172,7 @@ static int acsi_open( struct inode * inode, struct file * filp ) static int acsi_release( struct inode * inode, struct file * file ) { - int device = minor(inode->i_rdev); + int device = DEVICE_NR(inode->i_rdev); if (--access_count[device] == 0 && acsi_info[device].removable) acsi_prevent_removal(device, 0); return( 0 ); @@ -1204,7 +1200,7 @@ static void acsi_prevent_removal(int device, int flag) static int acsi_media_change (kdev_t dev) { - int device = minor(dev); + int device = DEVICE_NR(dev); struct acsi_info_struct *aip; aip = &acsi_info[device]; @@ -1807,7 +1803,7 @@ void cleanup_module(void) static int acsi_revalidate(kdev_t dev) { - int unit = DEVICE_NR(minor(dev)); + int unit = DEVICE_NR(dev); struct acsi_info_struct *aip = &acsi_info[unit]; stdma_lock( NULL, NULL ); if (acsi_devinit(aip) != DEV_SUPPORTED) { @@ -1821,6 +1817,6 @@ static int acsi_revalidate(kdev_t dev) ENABLE_IRQ(); stdma_release(); - acsi_part[minor(dev)].nr_sects = aip->size; + set_capacity(acsi_gendisk + unit, aip->size); return 0; } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 73f5888010d2..969be47e8711 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -407,8 +407,7 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, driver_geo.sectors = 0x3f; driver_geo.cylinders = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); } - driver_geo.start= - hba[ctlr]->hd[minor(inode->i_rdev)].start_sect; + driver_geo.start= get_start_sect(inode->i_bdev); if (copy_to_user((void *) arg, &driver_geo, sizeof( struct hd_geometry))) return -EFAULT; @@ -705,7 +704,7 @@ static int cciss_revalidate(kdev_t dev) int ctlr = major(dev) - MAJOR_NR; int target = minor(dev) >> NWD_SHIFT; struct gendisk *disk = &hba[ctlr]->gendisk[target]; - disk->part[0].nr_sects = hba[ctlr]->drv[target].nr_blocks; + set_capacity(disk, hba[ctlr]->drv[target].nr_blocks); return 0; } diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 658f201efd0f..2d6ff7620e46 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1497,7 +1497,7 @@ static int ida_revalidate(kdev_t dev) int ctlr = major(dev) - MAJOR_NR; int target = DEVICE_NR(dev); struct gendisk *gdev = &ida_gendisk[ctlr*NWD+target]; - gdev->part[minor(dev)].nr_sects = hba[ctlr]->drv[target].nr_blks; + set_capacity(gdev, hba[ctlr]->drv[target].nr_blks); return 0; } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index b100a1faf6b8..bb9c23e6b364 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -438,9 +438,9 @@ static int pd_revalidate(kdev_t dev) if ((unit >= PD_UNITS) || !PD.present) return -ENODEV; if (pd_identify(unit)) - pd_hd[minor(dev)].nr_sects = PD.capacity; + set_capacity(&PD.gd, PD.capacity); else - pd_hd[minor(dev)].nr_sects = 0; + set_capacity(&PD.gd, 0); return 0; } @@ -727,7 +727,7 @@ repeat: pd_count = CURRENT->current_nr_sectors; if ((pd_dev >= PD_DEVS) || - ((pd_block+pd_count) > pd_hd[pd_dev].nr_sects)) { + ((pd_block+pd_count) > get_capacity(&pd[unit].gd))) { end_request(CURRENT, 0); goto repeat; } diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c index 0882dd30d940..0afce2e2ab8f 100644 --- a/drivers/block/ps2esdi.c +++ b/drivers/block/ps2esdi.c @@ -487,6 +487,7 @@ static void __init ps2esdi_get_device_cfg(void) static void do_ps2esdi_request(request_queue_t * q) { u_int block, count; + int unit; /* since, this routine is called with interrupts cleared - they must be before it finishes */ @@ -503,18 +504,19 @@ static void do_ps2esdi_request(request_queue_t * q) if (blk_queue_empty(QUEUE)) return; + unit = DEVICE_NR(CURRENT->rq_dev); if (isa_virt_to_bus(CURRENT->buffer + CURRENT->current_nr_sectors * 512) > 16 * MB) { printk("%s: DMA above 16MB not supported\n", DEVICE_NAME); end_request(CURRENT, FAIL); } /* check for above 16Mb dmas */ - else if ((DEVICE_NR(CURRENT->rq_dev) < ps2esdi_drives) && + else if ((unit < ps2esdi_drives) && (CURRENT->sector + CURRENT->current_nr_sectors <= - ps2esdi[minor(CURRENT->rq_dev)].nr_sects) && + get_capacity(&ps2esdi_gendisk[unit])) && CURRENT->flags & REQ_CMD) { #if 0 printk("%s:got request. device : %d minor : %d command : %d sector : %ld count : %ld\n", DEVICE_NAME, - DEVICE_NR(CURRENT->rq_dev), minor(CURRENT->rq_dev), + unit, minor(CURRENT->rq_dev), CURRENT->cmd, CURRENT->sector, CURRENT->current_nr_sectors); #endif @@ -524,10 +526,10 @@ static void do_ps2esdi_request(request_queue_t * q) switch (rq_data_dir(CURRENT)) { case READ: - ps2esdi_readwrite(READ, DEVICE_NR(CURRENT->rq_dev), block, count); + ps2esdi_readwrite(READ, unit, block, count); break; case WRITE: - ps2esdi_readwrite(WRITE, DEVICE_NR(CURRENT->rq_dev), block, count); + ps2esdi_readwrite(WRITE, unit, block, count); break; default: printk("%s: Unknown command\n", DEVICE_NAME); @@ -538,7 +540,7 @@ static void do_ps2esdi_request(request_queue_t * q) /* is request is valid */ else { printk("Grrr. error. ps2esdi_drives: %d, %lu %lu\n", ps2esdi_drives, - CURRENT->sector, ps2esdi[minor(CURRENT->rq_dev)].nr_sects); + CURRENT->sector, get_capacity(&ps2esdi_gendisk[unit])); end_request(CURRENT, FAIL); } diff --git a/drivers/block/umem.c b/drivers/block/umem.c index c1c872a6e358..8e71ac89aa8c 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -812,7 +812,7 @@ static void del_battery_timer(void) static int mm_revalidate(kdev_t i_rdev) { int card_number = DEVICE_NR(i_rdev); - mm_partitions[minor(i_rdev)].nr_sects = cards[card_number].mm_size << 1; + set_capacity(mm_gendisk + card_number, cards[card_number].mm_size << 1); return 0; } /* diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 7f30d2df7506..9d6bf0ada74d 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -279,15 +279,17 @@ static void do_xd_request (request_queue_t * q) return; while (1) { + int unit; code = 0; /* do some checking on the request structure */ if (blk_queue_empty(QUEUE)) return; - if (DEVICE_NR(CURRENT->rq_dev) < xd_drives + unit = DEVICE_NR(CURRENT->rq_dev); + if (unit < xd_drives && (CURRENT->flags & REQ_CMD) && CURRENT->sector + CURRENT->nr_sectors - <= xd_struct[minor(CURRENT->rq_dev)].nr_sects) { + <= get_capacity(xd_gendisk + unit)) { block = CURRENT->sector; count = CURRENT->nr_sectors; @@ -295,7 +297,7 @@ static void do_xd_request (request_queue_t * q) case READ: case WRITE: for (retry = 0; (retry < XD_RETRIES) && !code; retry++) - code = xd_readwrite(rq_data_dir(CURRENT),DEVICE_NR(CURRENT->rq_dev), + code = xd_readwrite(rq_data_dir(CURRENT),unit, CURRENT->buffer,block,count); break; default: diff --git a/drivers/ide/hd.c b/drivers/ide/hd.c index 534983a99346..1bf3df67d0dc 100644 --- a/drivers/ide/hd.c +++ b/drivers/ide/hd.c @@ -596,22 +596,21 @@ repeat: reset_hd(); return; } - dev = minor(CURRENT->rq_dev); + dev = DEVICE_NR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; - if (dev >= (NR_HD<<6) || (dev & 0x3f) || - block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { - if (dev >= (NR_HD<<6) || (dev & 0x3f)) + if (dev >= NR_HD || block >= get_capacity(hd_gendisk+dev) || + ((block+nsect) > get_capacity(hd_gendisk+unit))) { + if (dev >= NR_HD) printk("hd: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); else printk("hd%c: bad access: block=%d, count=%d\n", - (minor(CURRENT->rq_dev)>>6)+'a', block, nsect); + dev+'a', block, nsect); end_request(CURRENT, 0); goto repeat; } - dev >>= 6; if (special_op[dev]) { if (do_special_op(dev)) goto repeat; @@ -819,10 +818,11 @@ static void __init hd_geninit(void) #endif for (drive=0 ; drive < NR_HD ; drive++) { - hd[drive<<6].nr_sects = hd_info[drive].head * + sector_t size = hd_info[drive].head * hd_info[drive].sect * hd_info[drive].cyl; - printk ("hd%c: %ldMB, CHS=%d/%d/%d\n", drive+'a', - hd[drive<<6].nr_sects / 2048, hd_info[drive].cyl, + set_capacity(hd_gendisk + drive, size); + printk ("%s: %ldMB, CHS=%d/%d/%d\n", hd_gendisk[drive].major_name, + size / 2048, hd_info[drive].cyl, hd_info[drive].head, hd_info[drive].sect); } if (!NR_HD) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 81ca240c6be5..b293f3e92833 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2138,7 +2138,7 @@ static int cdrom_read_toc(ide_drive_t *drive, struct request_sense *sense) if (stat) toc->capacity = 0x1fffff; - drive->part[0].nr_sects = toc->capacity * SECTORS_PER_FRAME; + set_capacity(drive->disk, toc->capacity * SECTORS_PER_FRAME); /* Remember that we've read this stuff. */ CDROM_STATE_FLAGS (drive)->toc_valid = 1; @@ -3148,7 +3148,7 @@ static int ide_cdrom_reinit (ide_drive_t *drive) add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, - g->part[0].nr_sects); + get_capacity(g)); return 0; failed: return 1; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index c20115bc35f1..a2a25a29af58 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -1500,7 +1500,7 @@ static int idefloppy_get_capacity (ide_drive_t *drive) drive->bios_cyl = 0; drive->bios_head = drive->bios_sect = 0; floppy->blocks = floppy->bs_factor = 0; - drive->part[0].nr_sects = 0; + set_capacity(drive->disk, 0); idefloppy_create_read_capacity_cmd (&pc); if (idefloppy_queue_pc_tail (drive, &pc)) { @@ -1555,7 +1555,7 @@ static int idefloppy_get_capacity (ide_drive_t *drive) (void) idefloppy_get_flexible_disk_page (drive); } - drive->part[0].nr_sects = floppy->blocks * floppy->bs_factor; + set_capacity(drive->disk, floppy->blocks * floppy->bs_factor); return rc; } @@ -2213,7 +2213,7 @@ static int idefloppy_reinit (ide_drive_t *drive) add_gendisk(g); register_disk(g, mk_kdev(g->major,g->first_minor), 1<minor_shift, ide_fops, - g->part[0].nr_sects); + get_capacity(g)); return 0; failed: return 1; diff --git a/drivers/ide/ide-geometry.c b/drivers/ide/ide-geometry.c index 5f21651c1785..850419f2a0a7 100644 --- a/drivers/ide/ide-geometry.c +++ b/drivers/ide/ide-geometry.c @@ -214,7 +214,7 @@ int ide_xlate_1024 (kdev_t i_rdev, int xparm, int ptheads, const char *msg) ret = 1; } - drive->part[0].nr_sects = current_capacity(drive); + set_capacity(drive->disk, current_capacity(drive)); if (ret) printk("%s%s [%d/%d/%d]", msg, msg1, diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 52472f955262..c053036cc289 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1661,7 +1661,7 @@ ide_drive_t *get_info_ptr (kdev_t i_rdev) if (unit < MAX_DRIVES) { ide_drive_t *drive = &hwif->drives[unit]; #if 0 - if ((drive->present) && (drive->part[minor].nr_sects)) + if (drive->present && get_capacity(drive->disk)) #else if (drive->present) #endif @@ -1748,7 +1748,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio void ide_revalidate_drive (ide_drive_t *drive) { - drive->disk->part[0].nr_sects = current_capacity(drive); + set_capacity(drive->disk, current_capacity(drive)); } /* @@ -1975,7 +1975,7 @@ void ide_unregister (unsigned int index) continue; minor = drive->select.b.unit << PARTN_BITS; for (p = 0; p < (1<part[p].nr_sects > 0) { + if (get_capacity(drive->disk)) { kdev_t devp = mk_kdev(hwif->major, minor+p); invalidate_device(devp, 0); } @@ -2523,7 +2523,7 @@ static int ide_ioctl (struct inode *inode, struct file *file, if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[minor(inode->i_rdev)&PARTN_MASK].start_sect, + if (put_user((unsigned)get_start_sect(inode->i_bdev), (unsigned long *) &loc->start)) return -EFAULT; return 0; } @@ -2535,7 +2535,7 @@ static int ide_ioctl (struct inode *inode, struct file *file, if (put_user(drive->head, (byte *) &loc->heads)) return -EFAULT; if (put_user(drive->sect, (byte *) &loc->sectors)) return -EFAULT; if (put_user(drive->cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[minor(inode->i_rdev)&PARTN_MASK].start_sect, + if (put_user((unsigned)get_start_sect(inode->i_bdev), (unsigned long *) &loc->start)) return -EFAULT; return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index e50bfe391c01..b903aa9d18e2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -618,7 +618,7 @@ static void free_mddev(mddev_t *mddev) export_array(mddev); md_size[mdidx(mddev)] = 0; - md_hd_struct[mdidx(mddev)].nr_sects = 0; + set_capacity(disks[mdidx(mddev)], 0); } #undef BAD_CSUM @@ -2343,7 +2343,7 @@ static int md_ioctl(struct inode *inode, struct file *file, err = put_user (4, (char *) &loc->sectors); if (err) goto abort_unlock; - err = put_user (md_hd_struct[mdidx(mddev)].nr_sects/8, + err = put_user(get_capacity(disks[mdidx(mddev)])/8, (short *) &loc->cylinders); if (err) goto abort_unlock; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index a8cdfb80a95a..3928e81ada4d 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -248,7 +248,7 @@ static u32 i2ob_get(struct i2ob_device *dev) * Turn a Linux block request into an I2O block read/write. */ -static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, u32 base, int unit) +static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, int unit) { struct i2o_controller *c = dev->controller; int tid = dev->tid; @@ -279,7 +279,7 @@ static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, /* This can be optimised later - just want to be sure its right for starters */ - offset = ((u64)(req->sector+base)) << 9; + offset = ((u64)req->sector) << 9; __raw_writel( offset & 0xFFFFFFFF, msg+24); __raw_writel(offset>>32, msg+28); mptr=msg+32; @@ -724,7 +724,7 @@ static int i2ob_evt(void *dummy) register_disk(p, mk_kdev(p->major, p->first_minor), 1<minor_shift, p->fops, - i2ob[unit].nr_sects); + get_capacity(p)); break; } @@ -768,7 +768,7 @@ static int i2ob_evt(void *dummy) i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8); spin_lock_irqsave(I2O_LOCK(unit), flags); - i2ob[unit].nr_sects = size>>9; + set_capacity(&i2o_disk[unit>>4], size>>9); spin_unlock_irqrestore(I2O_LOCK(unit), flags); break; } @@ -869,7 +869,7 @@ static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *de i2ob_backlog_tail[c->unit] = NULL; unit = minor(ireq->req->rq_dev); - i2ob_send(m, dev, ireq, i2ob[unit].start_sect, unit); + i2ob_send(m, dev, ireq, unit); } if(i2ob_backlog[c->unit]) return 1; @@ -969,7 +969,7 @@ static void i2ob_request(request_queue_t *q) i2ob_queues[dev->unit]->i2ob_qhead = ireq->next; ireq->req = req; - i2ob_send(m, dev, ireq, i2ob[unit].start_sect, (unit&0xF0)); + i2ob_send(m, dev, ireq, (unit&0xF0)); } } @@ -1038,7 +1038,7 @@ static int i2ob_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct hd_geometry g; - int u = minor(inode->i_rdev) & 0xF0; + int u = minor(inode->i_rdev) >> 4; /* Anyone capable of this syscall can do *real bad* things */ if (!capable(CAP_SYS_ADMIN)) @@ -1046,7 +1046,7 @@ static int i2ob_ioctl(struct inode *inode, struct file *file, if (cmd != HDIO_GETGEO) return -EINVAL; - i2o_block_biosparam(i2ob[u].nr_sects, + i2o_block_biosparam(get_capacity(&i2o_disk[u]), &g.cylinders, &g.heads, &g.sectors); g.start = get_start_sect(inode->i_bdev); return copy_to_user((void *)arg, &g, sizeof(g)) ? -EFAULT : 0; @@ -1222,7 +1222,7 @@ static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, i i2ob_query_device(dev, 0x0000, 5, &flags, 4); i2ob_query_device(dev, 0x0000, 6, &status, 4); - i2ob[unit].nr_sects = size>>9; + set_capacity(&i2o_disk[unit>>4], size>>9); /* Set limit based on inbound frame size */ limit = (d->controller->status_block->inbound_frame_size - 8)/2; @@ -1326,7 +1326,7 @@ static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, i i2o_event_register(c, d->lct_data.tid, i2ob_context, unit, (I2OB_EVENT_MASK & d->lct_data.event_capabilities)); - i2ob[unit].nr_sects = size>>9; + set_capacity(i2o_disk[unit>>4], size>>9); return 0; } @@ -1471,7 +1471,7 @@ static void i2ob_scan(int bios) register_disk(p, mk_kdev(p->major, p->first_minor), 1<minor_shift, p->fops, - i2ob[scan_unit].nr_sects); + get_capacity(p)); scan_unit+=16; i2ob_dev_count++; @@ -1564,7 +1564,7 @@ void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d) register_disk(p, mk_kdev(p->major, p->first_minor), 1<minor_shift, p->fops, - i2ob[unit].nr_sects); + get_capacity(p)); i2ob_dev_count++; i2o_device_notify_on(d, &i2o_block_handler); } diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 4d50cf22fc58..9b4758d19f12 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -846,7 +846,7 @@ static int ftl_open(struct inode *inode, struct file *file) if (partition->state != FTL_FORMATTED) return -ENXIO; - if (partition->disk->part[0].nr_sects == 0) + if (get_capacity(partition->disk) == 0) return -ENXIO; if (!get_mtd_device(partition->mtd, -1)) @@ -1132,8 +1132,8 @@ static int ftl_revalidate(kdev_t dev) int unit = minor(dev) >> 4; partition_t *part = myparts[unit]; scan_header(part); - part->disk->part[0].nr_sects = - le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE); + set_capacity(part->disk, + le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE)); return 0; } diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index 27cd8b48ca14..26be858d720b 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -846,10 +846,10 @@ void nftl_request(RQFUNC_ARG) down(&nftl->mutex); DEBUG(MTD_DEBUG_LEVEL3, "Got mutex\n"); - if (block + nsect > part_table[dev].nr_sects) { + if (block + nsect > get_capacity(nftl->disk)) { /* access past the end of device */ printk("nftl%c%d: bad access: block = %d, count = %d\n", - (minor(req->rq_dev)>>6)+'a', dev & 0xf, block, nsect); + unit+'a', dev & 0xf, block, nsect); up(&nftl->mutex); res = 0; /* fail */ goto repeat; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 39112441f969..5bc2dd6e9517 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -286,6 +286,8 @@ static request_queue_t *sd_find_queue(kdev_t dev) return NULL; /* No such device */ } +static struct gendisk **sd_disks; + /** * sd_init_command - build a scsi (read or write) command from * information in the request structure. @@ -321,7 +323,7 @@ static int sd_init_command(Scsi_Cmnd * SCpnt) /* >>>>> this change is not in the lk 2.5 series */ if (part_nr >= (sd_template.dev_max << 4) || (part_nr & 0xf) || !sdp || !sdp->online || - block + SCpnt->request->nr_sectors > sd[part_nr].nr_sects) { + block + SCpnt->request->nr_sectors > get_capacity(sd_disks[dsk_nr])) { SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n", SCpnt->request->nr_sectors)); SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); @@ -588,8 +590,6 @@ static struct block_device_operations sd_fops = revalidate: sd_revalidate }; -static struct gendisk **sd_disks; - /** * sd_rw_intr - bottom half handler: called when the lower level * driver has completed (successfully or otherwise) a scsi command. @@ -1374,7 +1374,7 @@ static int sd_revalidate(kdev_t dev) return -ENODEV; sd_init_onedisk(sdkp, dsk_nr); - sd_disks[dsk_nr]->part[0].nr_sects = sdkp->capacity; + set_capacity(sd_disks[dsk_nr], sdkp->capacity); return 0; } diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 2ced09b36ad5..e201e29df2dd 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -234,9 +234,9 @@ static void rw_intr(Scsi_Cmnd * SCpnt) * block. Therefore, if we hit a medium error within the last * 75 2K sectors, we decrease the saved size value. */ - if (error_sector < cd->disk->part[0].nr_sects && + if (error_sector < get_capacity(cd->disk) && cd->capacity - error_sector < 4 * 75) - cd->disk->part[0].nr_sects = error_sector; + set_capacity(cd->disk, error_sector); } /* @@ -555,7 +555,7 @@ static void get_sectorsize(Scsi_CD *cd) * what the device is capable of. */ cd->needs_sector_size = 0; - cd->disk->part[0].nr_sects = cd->capacity; + set_capacity(cd->disk, cd->capacity); } queue = &cd->device->request_queue; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 8927d25ef60d..67ce47f76946 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -338,13 +338,11 @@ void register_disk(struct gendisk *disk, kdev_t dev, unsigned minors, struct block_device_operations *ops, long size) { struct block_device *bdev; - struct hd_struct *p; if (!disk) return; - p = disk->part; - p[0].nr_sects = size; + set_capacity(disk, size); /* No minors to use for partitions */ if (!disk->minor_shift) @@ -410,7 +408,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) } if (bdev->bd_op->revalidate) bdev->bd_op->revalidate(dev); - if (disk->part[0].nr_sects) + if (get_capacity(disk)) check_partition(disk, bdev); for (p = 1; p < (1<minor_shift); p++) update_partition(disk, p); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6474393eff5e..0bd31339d6f5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -95,6 +95,14 @@ static inline unsigned long get_start_sect(struct block_device *bdev) { return bdev->bd_offset; } +static inline sector_t get_capacity(struct gendisk *disk) +{ + return disk->part[0].nr_sects; +} +static inline void set_capacity(struct gendisk *disk, sector_t size) +{ + disk->part[0].nr_sects = size; +} #endif /* __KERNEL__ */ -- cgit v1.2.3 From e86a37861c8a0830d7410bf9ebe5ebaf5f6adc7f Mon Sep 17 00:00:00 2001 From: Alexander Viro Date: Sat, 7 Sep 2002 03:05:14 -0700 Subject: [PATCH] (25/25) more cleanups of struct gendisk. * we remove the paritition 0 from ->part[] and put the old contents of ->part[0] into gendisk itself; indexes are shifted, obviously. * ->part is allocated at add_gendisk() time and freed at del_gendisk() according to value of ->minor_shift; static arrays of hd_struct are gone from drivers, ditto for manual allocations a-la ide. As the matter of fact, none of the drivers know about struct hd_struct now. --- drivers/acorn/block/mfmhd.c | 4 -- drivers/block/DAC960.c | 1 - drivers/block/DAC960.h | 2 - drivers/block/acsi.c | 2 - drivers/block/blkpg.c | 14 +++--- drivers/block/cciss.c | 2 - drivers/block/cciss.h | 3 -- drivers/block/cpqarray.c | 10 +--- drivers/block/genhd.c | 26 ++++++++-- drivers/block/paride/pd.c | 3 -- drivers/block/ps2esdi.c | 3 -- drivers/block/umem.c | 4 -- drivers/block/xd.c | 4 -- drivers/ide/hd.c | 4 -- drivers/ide/ide-probe.c | 11 ---- drivers/ide/ide.c | 3 +- drivers/md/md.c | 9 +--- drivers/message/i2o/i2o_block.c | 2 - drivers/mtd/ftl.c | 3 -- drivers/mtd/nftlcore.c | 11 ---- drivers/s390/block/dasd_genhd.c | 2 - drivers/scsi/sd.c | 11 +--- drivers/scsi/sd.h | 2 - drivers/scsi/sr.c | 26 ++++------ fs/block_dev.c | 12 ++--- fs/partitions/check.c | 108 +++++++++++++++++++++++++++------------- include/linux/genhd.h | 11 ++-- include/linux/ide.h | 1 - include/linux/raid/md.h | 1 - 29 files changed, 127 insertions(+), 168 deletions(-) (limited to 'include/linux') diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c index 8989984b9bda..55df18f1a484 100644 --- a/drivers/acorn/block/mfmhd.c +++ b/drivers/acorn/block/mfmhd.c @@ -193,8 +193,6 @@ struct mfm_info { #define MFM_DRV_INFO mfm_info[raw_cmd.dev] -static struct hd_struct mfm[MFM_MAXDRIVES << 6]; - /* Stuff from the assembly routines */ extern unsigned int hdc63463_baseaddress; /* Controller base address */ extern unsigned int hdc63463_irqpolladdress; /* Address to read to test for int */ @@ -888,14 +886,12 @@ static struct gendisk mfm_gendisk[2] = { .first_minor = 0, .major_name = "mfma", .minor_shift = 6, - .part = mfm, }, { .major = MAJOR_NR, .first_minor = 64, .major_name = "mfmb", .minor_shift = 6, - .part = mfm + 64, }; static void mfm_request(void) diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 58415a08c41b..989f13e46e8f 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1968,7 +1968,6 @@ static boolean DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) struct gendisk *disk = &Controller->disks[n]; memset(disk, 0, sizeof(struct gendisk)); sprintf(names + 9 * n, "rd/c%dd%d", Controller->ControllerNumber, n); - disk->part = Controller->DiskPartitions + (n<major = MajorNumber; disk->first_minor = n << DAC960_MaxPartitionsBits; disk->major_name = names + 9 * n; diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h index 53b4bcb47d5a..33b2822520e3 100644 --- a/drivers/block/DAC960.h +++ b/drivers/block/DAC960.h @@ -2196,7 +2196,6 @@ typedef struct file File_T; typedef struct block_device_operations BlockDeviceOperations_T; typedef struct completion Completion_T; typedef struct hd_geometry DiskGeometry_T; -typedef struct hd_struct DiskPartition_T; typedef struct inode Inode_T; typedef struct inode_operations InodeOperations_T; typedef kdev_t KernelDevice_T; @@ -2472,7 +2471,6 @@ typedef struct DAC960_Controller boolean LogicalDriveFoundDuringScan[DAC960_MaxLogicalDrives]; } V2; } FW; - DiskPartition_T DiskPartitions[DAC960_MinorCount]; unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; unsigned char UserStatusBuffer[DAC960_UserMessageSize]; } diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c index 1197c8b3bb86..3313f7cb0a3c 100644 --- a/drivers/block/acsi.c +++ b/drivers/block/acsi.c @@ -245,7 +245,6 @@ char *acsi_buffer; unsigned long phys_acsi_buffer; static int NDevices; -static struct hd_struct acsi_part[MAX_DEV<<4]; static char acsi_names[MAX_DEV*4]; static int access_count[MAX_DEV]; @@ -1698,7 +1697,6 @@ static void acsi_geninit(void) disk->first_minor = i << 4; disk->major_name = acsi_names + 4*i; disk->minor_shift = (acsi_info[i].type==HARDDISK)?4:0; - disk->part = acsi_part + (i<<4); disk->fops = &acsi_fops; add_gendisk(disk); register_disk(disk, mk_kdev(disk->major, disk->first_minor), diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c index 9fbfaafbea25..941d1051d894 100644 --- a/drivers/block/blkpg.c +++ b/drivers/block/blkpg.c @@ -95,18 +95,18 @@ int add_partition(struct block_device *bdev, struct blkpg_partition *p) return -EINVAL; /* partition number in use? */ - if (g->part[p->pno].nr_sects != 0) + if (g->part[p->pno - 1].nr_sects != 0) return -EBUSY; /* overlap? */ - for (i = 1; i < (1<minor_shift); i++) + for (i = 0; i < (1<minor_shift) - 1; i++) if (!(pstart+plength <= g->part[i].start_sect || pstart >= g->part[i].start_sect + g->part[i].nr_sects)) return -EBUSY; /* all seems OK */ - g->part[p->pno].start_sect = pstart; - g->part[p->pno].nr_sects = plength; + g->part[p->pno - 1].start_sect = pstart; + g->part[p->pno - 1].nr_sects = plength; update_partition(g, p->pno); return 0; } @@ -138,7 +138,7 @@ int del_partition(struct block_device *bdev, struct blkpg_partition *p) return -EINVAL; /* existing drive and partition? */ - if (g->part[p->pno].nr_sects == 0) + if (g->part[p->pno - 1].nr_sects == 0) return -ENXIO; /* partition in use? Incomplete check for now. */ @@ -154,8 +154,8 @@ int del_partition(struct block_device *bdev, struct blkpg_partition *p) fsync_bdev(bdevp); invalidate_bdev(bdevp, 0); - g->part[p->pno].start_sect = 0; - g->part[p->pno].nr_sects = 0; + g->part[p->pno - 1].start_sect = 0; + g->part[p->pno - 1].nr_sects = 0; update_partition(g, p->pno); bd_release(bdevp); bdput(bdevp); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 969be47e8711..0b8fb9a5c7a5 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -750,7 +750,6 @@ static int revalidate_allvol(kdev_t dev) * Set the partition and block size structures for all volumes * on this controller to zero. We will reread all of this data */ - memset(hba[ctlr]->hd, 0, sizeof(struct hd_struct) * 256); memset(hba[ctlr]->drv, 0, sizeof(drive_info_struct) * CISS_MAX_LUN); /* @@ -2445,7 +2444,6 @@ static int __init cciss_init_one(struct pci_dev *pdev, disk->first_minor = j << NWD_SHIFT; disk->major_name = NULL; disk->minor_shift = NWD_SHIFT; - disk->part = hba[i]->hd + (j << NWD_SHIFT); if( !(drv->nr_blocks)) continue; (BLK_DEFAULT_QUEUE(MAJOR_NR + i))->hardsect_size = drv->block_size; diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 7192717a23fe..1baed4acef84 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -83,9 +83,6 @@ struct ctlr_info // Disk structures we need to pass back struct gendisk gendisk[NWD]; char names[12 * NWD]; - // indexed by minor numbers - struct hd_struct hd[256]; - int sizes[256]; #ifdef CONFIG_CISS_SCSI_TAPE void *scsi_ctlr; /* ptr to structure containing scsi related stuff */ #endif diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 2d6ff7620e46..aec445236fa0 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -102,7 +102,6 @@ static struct board_type products[] = { { 0x40580E11, "Smart Array 431", &smart4_access }, }; -static struct hd_struct * ida; static char *ida_names; static struct gendisk ida_gendisk[MAX_CTLR * NWD]; @@ -320,7 +319,6 @@ void cleanup_module(void) } devfs_find_and_unregister(NULL, "ida", 0, 0, 0, 0); remove_proc_entry("cpqarray", proc_root_driver); - kfree(ida); kfree(ida_names); } #endif /* MODULE */ @@ -346,15 +344,12 @@ int __init cpqarray_init(void) printk("Found %d controller(s)\n", nr_ctlr); /* allocate space for disk structs */ - ida = kmalloc(sizeof(struct hd_struct)*nr_ctlr*NWD*16, GFP_KERNEL); ida_names = kmalloc(nr_ctlr*NWD*10, GFP_KERNEL); - if (!ida || !ida_names) { + if (!ida_names) { printk( KERN_ERR "cpqarray: out of memory"); - kfree(ida); kfree(ida_names); return(num_cntlrs_reg); } - memset(ida, 0, sizeof(struct hd_struct)*nr_ctlr*NWD*16); /* * register block devices * Find disks and fill in structs @@ -407,7 +402,6 @@ int __init cpqarray_init(void) if (num_cntlrs_reg == 0) { - kfree(ida); kfree(ida_names); } return(num_cntlrs_reg); @@ -449,7 +443,6 @@ int __init cpqarray_init(void) disk->major = MAJOR_NR + i; disk->first_minor = j<minor_shift = NWD_SHIFT; - disk->part = ida + i*256 + (j<flags = GENHD_FL_DEVFS; disk->fops = &ida_fops; if (!drv->nr_blks) @@ -1462,7 +1455,6 @@ static int revalidate_allvol(kdev_t dev) del_gendisk(disk); disk->major_name = NULL; } - memset(ida+(ctlr*256), 0, sizeof(struct hd_struct)*NWD*16); memset(hba[ctlr]->drv, 0, sizeof(drv_info_t)*NWD); /* diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index 589ae73d7366..51b77476e95f 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -23,6 +23,7 @@ #include #include #include +#include static rwlock_t gendisk_lock; @@ -43,6 +44,19 @@ void add_gendisk(struct gendisk *gp) { struct gendisk *sgp; + struct hd_struct *p = NULL; + + if (gp->minor_shift) { + size_t size = sizeof(struct hd_struct)*((1<minor_shift)-1); + p = kmalloc(size, GFP_KERNEL); + if (!p) { + printk(KERN_ERR "out of memory; no partitions for %s\n", + gp->major_name); + gp->minor_shift = 0; + } else + memset(p, 0, size); + } + gp->part = p; write_lock(&gendisk_lock); @@ -148,13 +162,17 @@ static int show_partition(struct seq_file *part, void *v) seq_puts(part, "major minor #blocks name\n\n"); /* show the full disk and all non-0 size partitions of it */ - for (n = 0; n < 1<minor_shift; n++) { - if (n && sgp->part[n].nr_sects == 0) + seq_printf(part, "%4d %4d %10ld %s\n", + sgp->major, sgp->first_minor, + get_capacity(sgp) >> 1, + disk_name(sgp, 0, buf)); + for (n = 0; n < (1<minor_shift) - 1; n++) { + if (sgp->part[n].nr_sects == 0) continue; seq_printf(part, "%4d %4d %10ld %s\n", - sgp->major, n + sgp->first_minor, + sgp->major, n + 1 + sgp->first_minor, sgp->part[n].nr_sects >> 1 , - disk_name(sgp, n, buf)); + disk_name(sgp, n + 1, buf)); } return 0; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index bb9c23e6b364..66e3300028a2 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -271,8 +271,6 @@ static void pd_doorlock(int unit, int func); static int pd_check_media(kdev_t dev); static void pd_eject( int unit); -static struct hd_struct pd_hd[PD_DEVS]; - #define PD_NAMELEN 8 struct pd_unit { @@ -689,7 +687,6 @@ static int pd_detect( void ) PD.gd.fops = &pd_fops; PD.gd.major = major; PD.gd.first_minor = unit << PD_BITS; - PD.gd.part = pd_hd + (unit << PD_BITS); add_gendisk(&PD.gd); register_disk(&PD.gd,mk_kdev(MAJOR_NR,unit<part = mm_partitions + (i << MM_SHIFT); disk->major = major_nr; disk->first_minor = i << MM_SHIFT; disk->major_name = mm_names + i*6; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 9d6bf0ada74d..97ec536e4756 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -121,8 +121,6 @@ static unsigned int xd_bases[] __initdata = 0xE0000 }; -static struct hd_struct xd_struct[XD_MAXDRIVES << 6]; - static spinlock_t xd_lock = SPIN_LOCK_UNLOCKED; extern struct block_device_operations xd_fops; @@ -133,14 +131,12 @@ static struct gendisk xd_gendisk[2] = { .first_minor = 0, .major_name = "xda", .minor_shift = 6, - .part = xd_struct, .fops = &xd_fops, },{ .major = MAJOR_NR, .first_minor = 64, .major_name = "xdb", .minor_shift = 6, - .part = xd_struct + 64, .fops = &xd_fops, } }; diff --git a/drivers/ide/hd.c b/drivers/ide/hd.c index 1bf3df67d0dc..714e27ff87b1 100644 --- a/drivers/ide/hd.c +++ b/drivers/ide/hd.c @@ -145,8 +145,6 @@ static struct hd_i_struct hd_info[MAX_HD]; static int NR_HD; #endif -static struct hd_struct hd[MAX_HD<<6]; - static struct timer_list device_timer; #define TIMEOUT_VALUE (6*HZ) @@ -706,14 +704,12 @@ static struct gendisk hd_gendisk[2] = { .first_minor = 0, .major_name = "hda", .minor_shift = 6, - .part = hd, .fops = &hd_fops, },{ .major = MAJOR_NR, .first_minor = 64, .major_name = "hdb", .minor_shift = 6, - .part = hd + 64, .fops = &hd_fops, } }; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 47336c9d408e..f56203db719a 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -807,7 +807,6 @@ static int init_irq (ide_hwif_t *hwif) static void init_gendisk (ide_hwif_t *hwif) { struct gendisk *gd; - struct hd_struct *part; unsigned int unit, units, minors; extern devfs_handle_t ide_devfs_handle; char *names; @@ -828,20 +827,12 @@ static void init_gendisk (ide_hwif_t *hwif) goto err_kmalloc_gd; memset(gd, 0, MAX_DRIVES * sizeof(struct gendisk)); - part = kmalloc(minors * sizeof(struct hd_struct), GFP_KERNEL); - if (!part) - goto err_kmalloc_gd_part; - - memset(part, 0, minors * sizeof(struct hd_struct)); - names = kmalloc (4 * MAX_DRIVES, GFP_KERNEL); if (!names) goto err_kmalloc_gd_names; memset(names, 0, 4 * MAX_DRIVES); for (unit = 0; unit < units; ++unit) { - gd[unit].part = part + (unit << PARTN_BITS); - hwif->drives[unit].part = gd[unit].part; gd[unit].major = hwif->major; gd[unit].first_minor = unit << PARTN_BITS; sprintf(names + 4*unit, "hd%c",'a'+hwif->index*MAX_DRIVES+unit); @@ -877,8 +868,6 @@ static void init_gendisk (ide_hwif_t *hwif) return; err_kmalloc_gd_names: - kfree(part); -err_kmalloc_gd_part: kfree(gd); err_kmalloc_gd: printk(KERN_WARNING "(ide::init_gendisk) Out of memory\n"); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index c053036cc289..8d37e3b3662f 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -2058,10 +2058,9 @@ void ide_unregister (unsigned int index) gd = hwif->drives[0].disk; if (gd) { int i; - kfree(gd->part); - kfree(gd); for (i = 0; i < MAX_DRIVES; i++) hwif->drives[i].disk = NULL; + kfree(gd); } old_hwif = *hwif; init_hwif_data (index); /* restore hwif data to pristine status */ diff --git a/drivers/md/md.c b/drivers/md/md.c index b903aa9d18e2..2df1961eb468 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -103,11 +103,6 @@ static ctl_table raid_root_table[] = { {0} }; -/* - * these have to be allocated separately because external - * subsystems want to have a pre-defined structure - */ -struct hd_struct md_hd_struct[MAX_MD_DEVS]; static void md_recover_arrays(void); static mdk_thread_t *md_recovery_thread; @@ -1458,7 +1453,6 @@ static int do_md_run(mddev_t * mddev) disk->minor_shift = 0; sprintf(major_name, "md%d", mdidx(mddev)); disk->major_name = major_name; - disk->part = md_hd_struct + mdidx(mddev); disk->fops = &md_fops; mddev->pers = pers[pnum]; @@ -3178,11 +3172,10 @@ int __init md_init(void) return (-1); } devfs_handle = devfs_mk_dir (NULL, "md", NULL); - /* we don't use devfs_register_series because we want to fill md_hd_struct */ for (minor=0; minor < MAX_MD_DEVS; ++minor) { char devname[128]; sprintf (devname, "%u", minor); - md_hd_struct[minor].de = devfs_register (devfs_handle, + devfs_register (devfs_handle, devname, DEVFS_FL_DEFAULT, MAJOR_NR, minor, S_IFBLK | S_IRUSR | S_IWUSR, &md_fops, NULL); } diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 3928e81ada4d..ae7befdf574b 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -186,7 +186,6 @@ static struct i2ob_request *i2ob_backlog_tail[MAX_I2O_CONTROLLERS]; static struct i2ob_device i2ob_dev[MAX_I2OB<<4]; static int i2ob_dev_count = 0; -static struct hd_struct i2ob[MAX_I2OB<<4]; static struct gendisk i2o_disk[MAX_I2OB]; static char i2o_names[MAX_I2OB * 8]; @@ -1771,7 +1770,6 @@ int i2o_block_init(void) disk->major = MAJOR_NR; disk->first_minor = i<<4; disk->minor_shift = 4; - disk->part = i2ob + (i<<4); disk->fops = &i2ob_fops; disk->major_name = i2o_names + i*8; sprintf(disk->major_name, "i2o/hd%c", 'a' + i); diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 9b4758d19f12..4ddc0d9337ec 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -175,8 +175,6 @@ static struct mtd_notifier ftl_notifier = { #define XFER_PREPARED 0x03 #define XFER_FAILED 0x04 -static struct hd_struct ftl_hd[MINOR_NR(MAX_DEV, 0, 0)]; - /*====================================================================*/ static int ftl_ioctl(struct inode *inode, struct file *file, @@ -1252,7 +1250,6 @@ static void ftl_notify_add(struct mtd_info *mtd) disk->first_minor = device << 4; disk->major_name = name; disk->minor_shift = PART_BITS; - disk->part = ftl_hd + (device << 4); disk->fops = &ftl_blk_fops; partition->mtd = mtd; partition->disk = disk; diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index 26be858d720b..c6fa25aeabf6 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -49,16 +49,6 @@ /* Linux-specific block device functions */ -/* I _HATE_ the Linux block device setup more than anything else I've ever - * encountered, except ... - */ - -/* .. for the Linux partition table handling. */ -/* So why didn't you fucking go and clean it up? -- AV */ -struct hd_struct part_table[256]; - -static struct block_device_operations nftl_fops; - struct NFTLrecord *NFTLs[MAX_NFTLS]; static void NFTL_setup(struct mtd_info *mtd) @@ -150,7 +140,6 @@ static void NFTL_setup(struct mtd_info *mtd) gd->major = MAJOR_NR; gd->first_minor = firstfree << NFTL_PARTN_BITS; gd->minor_shift = NFTL_PARTN_BITS; - gd->part = part_table + (firstfree << NFTL_PARTN_BITS); gd->major_name = name; nftl->disk = gd; add_gendisk(gd); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 700285728e59..6df129445724 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -35,7 +35,6 @@ struct major_info { int major; struct gendisk disks[DASD_PER_MAJOR]; char names[DASD_PER_MAJOR * 8]; - struct hd_struct part[1<minor_shift = DASD_PARTN_BITS; disk->fops = &dasd_device_operations; disk->flags = GENHD_FL_DEVFS; - disk->part = mi->part + (i << DASD_PARTN_BITS); } /* Setup block device pointers for the new major. */ diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5bc2dd6e9517..356952ba39e9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -84,9 +84,6 @@ #define SD_DSK_ARR_LUMP 6 /* amount to over allocate sd_dsk_arr by */ - -struct hd_struct *sd; - static Scsi_Disk ** sd_dsk_arr; static rwlock_t sd_dsk_arr_lock = RW_LOCK_UNLOCKED; @@ -1195,12 +1192,10 @@ static int sd_init() init_mem_lth(sd_disks, sd_template.dev_max); if (sd_disks) zero_mem_lth(sd_disks, sd_template.dev_max); - init_mem_lth(sd, maxparts); - if (!sd_dsk_arr || !sd || !sd_disks) + if (!sd_dsk_arr || !sd_disks) goto cleanup_mem; - zero_mem_lth(sd, maxparts); return 0; #undef init_mem_lth @@ -1209,8 +1204,6 @@ static int sd_init() cleanup_mem: vfree(sd_disks); sd_disks = NULL; - vfree(sd); - sd = NULL; if (sd_dsk_arr) { for (k = 0; k < sd_template.dev_max; ++k) vfree(sd_dsk_arr[k]); @@ -1347,7 +1340,6 @@ static int sd_attach(Scsi_Device * sdp) gd->major = SD_MAJOR(dsk_nr>>4); gd->first_minor = (dsk_nr & 15)<<4; gd->minor_shift = 4; - gd->part = sd + (dsk_nr << 4); gd->fops = &sd_fops; if (dsk_nr > 26) sprintf(p->name, "sd%c%c", 'a'+dsk_nr/26-1, 'a'+dsk_nr%26); @@ -1465,7 +1457,6 @@ static void __exit exit_sd(void) vfree(sd_dsk_arr[k]); vfree(sd_dsk_arr); } - vfree((char *) sd); for (k = 0; k < N_USED_SD_MAJORS; k++) { blk_dev[SD_MAJOR(k)].queue = NULL; blk_clear(SD_MAJOR(k)); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index e3eb4be41a26..5ba16cfec18c 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -20,8 +20,6 @@ #include #endif -extern struct hd_struct *sd; - typedef struct scsi_disk { unsigned capacity; /* size in 512-byte sectors */ Scsi_Device *device; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index e201e29df2dd..4e68f16b14e7 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -744,31 +744,25 @@ void sr_finish() for (i = 0; i < sr_template.nr_dev; ++i) { struct gendisk *disk; - /* KLUDGE - will go away */ - struct { - struct gendisk disk; - struct hd_struct part; - } *p; Scsi_CD *cd = &scsi_CDs[i]; /* If we have already seen this, then skip it. Comes up * with loadable modules. */ if (cd->disk) continue; - p = kmalloc(sizeof(*p), GFP_KERNEL); - if (!p) + disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + if (!disk) continue; if (cd->disk) { - kfree(p); + kfree(disk); continue; } - memset(p, 0, sizeof(*p)); - p->disk.part = &p->part; - p->disk.major = MAJOR_NR; - p->disk.first_minor = i; - p->disk.major_name = cd->cdi.name; - p->disk.minor_shift = 0; - p->disk.fops = &sr_bdops; - cd->disk = disk = &p->disk; + memset(disk, 0, sizeof(struct gendisk)); + disk->major = MAJOR_NR; + disk->first_minor = i; + disk->minor_shift = 0; + disk->major_name = cd->cdi.name; + disk->fops = &sr_bdops; + cd->disk = disk; cd->capacity = 0x1fffff; cd->device->sector_size = 2048;/* A guess, just in case */ cd->needs_sector_size = 1; diff --git a/fs/block_dev.c b/fs/block_dev.c index 30e46a931b2f..b4cdee9bf538 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -503,7 +503,6 @@ int check_disk_change(struct block_device *bdev) struct block_device_operations * bdops = bdev->bd_op; kdev_t dev = to_kdev_t(bdev->bd_dev); struct gendisk *disk; - struct hd_struct *part; if (bdops->check_media_change == NULL) return 0; @@ -517,7 +516,6 @@ int check_disk_change(struct block_device *bdev) printk("VFS: busy inodes on changed media.\n"); disk = get_gendisk(dev); - part = disk->part + minor(dev) - disk->first_minor; if (bdops->revalidate) bdops->revalidate(dev); if (disk && disk->minor_shift) @@ -632,11 +630,9 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * sector_t sect = 0; bdev->bd_offset = 0; - if (g) { - struct hd_struct *p; - p = g->part + minor(dev) - g->first_minor; - sect = p->nr_sects; - } else if (blk_size[major(dev)]) + if (g) + sect = get_capacity(g); + else if (blk_size[major(dev)]) sect = blk_size[major(dev)][minor(dev)] << 1; bd_set_size(bdev, (loff_t)sect << 9); bdi = blk_get_backing_dev_info(bdev); @@ -653,7 +649,7 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * if (!bdev->bd_openers) { struct gendisk *g = get_gendisk(dev); struct hd_struct *p; - p = g->part + minor(dev) - g->first_minor; + p = g->part + minor(dev) - g->first_minor - 1; inode->i_data.backing_dev_info = bdev->bd_inode->i_data.backing_dev_info = bdev->bd_contains->bd_inode->i_data.backing_dev_info; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 67ce47f76946..75f096ff1e9a 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -89,19 +89,25 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = char *disk_name(struct gendisk *hd, int part, char *buf) { - if (part < 1<minor_shift && hd->part[part].de) { - int pos; - - pos = devfs_generate_path(hd->part[part].de, buf, 64); - if (pos >= 0) - return buf + pos; - } - if (!part) + int pos; + if (!part) { + if (hd->disk_de) { + pos = devfs_generate_path(hd->disk_de, buf, 64); + if (pos >= 0) + return buf + pos; + } sprintf(buf, "%s", hd->major_name); - else if (isdigit(hd->major_name[strlen(hd->major_name)-1])) - sprintf(buf, "%sp%d", hd->major_name, part); - else - sprintf(buf, "%s%d", hd->major_name, part); + } else { + if (hd->part[part-1].de) { + pos = devfs_generate_path(hd->part[part-1].de, buf, 64); + if (pos >= 0) + return buf + pos; + } + if (isdigit(hd->major_name[strlen(hd->major_name)-1])) + sprintf(buf, "%sp%d", hd->major_name, part); + else + sprintf(buf, "%s%d", hd->major_name, part); + } return buf; } @@ -144,21 +150,29 @@ static void driverfs_create_partitions(struct gendisk *hd) *name = *bus_id = '\0'; } - dev = &p[0].hd_driverfs_dev; + dev = &hd->disk_dev; dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor); sprintf(dev->name, "%sdisc", name); sprintf(dev->bus_id, "%sdisc", bus_id); for (part=1; part < max_p; part++) { - dev = &p[part].hd_driverfs_dev; + dev = &p[part-1].hd_driverfs_dev; sprintf(dev->name, "%spart%d", name, part); sprintf(dev->bus_id, "%s:p%d", bus_id, part); - if (!p[part].nr_sects) + if (!p[part-1].nr_sects) continue; dev->driver_data = (void *)(long)__mkdev(hd->major, hd->first_minor+part); } - for (part=0; part < max_p; part++) { + dev = &hd->disk_dev; + dev->parent = parent; + if (parent) + dev->bus = parent->bus; + device_register(dev); + device_create_file(dev, &dev_attr_type); + device_create_file(dev, &dev_attr_kdev); + + for (part=0; part < max_p-1; part++) { dev = &p[part].hd_driverfs_dev; dev->parent = parent; if (parent) @@ -174,11 +188,12 @@ static void driverfs_create_partitions(struct gendisk *hd) static void driverfs_remove_partitions(struct gendisk *hd) { int max_p = 1<minor_shift; + struct device *dev; struct hd_struct *p; int part; - for (part=0, p = hd->part; part < max_p; part++, p++) { - struct device *dev = &p->hd_driverfs_dev; + for (part=1, p = hd->part; part < max_p; part++, p++) { + dev = &p->hd_driverfs_dev; if (dev->driver_data) { device_remove_file(dev, &dev_attr_type); device_remove_file(dev, &dev_attr_kdev); @@ -186,6 +201,13 @@ static void driverfs_remove_partitions(struct gendisk *hd) dev->driver_data = NULL; } } + dev = &hd->disk_dev; + if (dev->driver_data) { + device_remove_file(dev, &dev_attr_type); + device_remove_file(dev, &dev_attr_kdev); + put_device(dev); + dev->driver_data = NULL; + } } static void check_partition(struct gendisk *hd, struct block_device *bdev) @@ -227,10 +249,10 @@ static void check_partition(struct gendisk *hd, struct block_device *bdev) } p = hd->part; for (j = 1; j < state->limit; j++) { - p[j].start_sect = state->parts[j].from; - p[j].nr_sects = state->parts[j].size; + p[j-1].start_sect = state->parts[j].from; + p[j-1].nr_sects = state->parts[j].size; #if CONFIG_BLK_DEV_MD - if (!state->parts[j].flags) + if (!state->parts[j-1].flags) continue; md_autodetect_dev(dev+j); #endif @@ -248,15 +270,15 @@ static void devfs_register_partition(struct gendisk *dev, int part) struct hd_struct *p = dev->part; char devname[16]; - if (p[part].de) + if (p[part-1].de) return; - dir = devfs_get_parent(p[0].de); + dir = devfs_get_parent(dev->disk_de); if (!dir) return; if (dev->flags & GENHD_FL_REMOVABLE) devfs_flags |= DEVFS_FL_REMOVABLE; sprintf(devname, "part%d", part); - p[part].de = devfs_register (dir, devname, devfs_flags, + p[part-1].de = devfs_register (dir, devname, devfs_flags, dev->major, dev->first_minor + part, S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); @@ -300,10 +322,10 @@ static void devfs_create_partitions(struct gendisk *dev) sprintf(symlink, "disc%d", dev->number); devfs_mk_symlink (devfs_handle, symlink, DEVFS_FL_DEFAULT, dirname + pos, &slave, NULL); - p->de = devfs_register(dir, "disc", devfs_flags, + dev->disk_de = devfs_register(dir, "disc", devfs_flags, dev->major, dev->first_minor, S_IFBLK | S_IRUSR | S_IWUSR, dev->fops, NULL); - devfs_auto_unregister(p->de, slave); + devfs_auto_unregister(dev->disk_de, slave); if (!(dev->flags & GENHD_FL_DEVFS)) devfs_auto_unregister (slave, dir); for (part = 1, p++; part < max_p; part++, p++) @@ -316,10 +338,12 @@ static void devfs_remove_partitions(struct gendisk *dev) { #ifdef CONFIG_DEVFS_FS int part; - for (part = 1<minor_shift; part--; ) { + for (part = (1<minor_shift)-1; part--; ) { devfs_unregister(dev->part[part].de); dev->part[part].de = NULL; } + devfs_unregister(dev->disk_de); + dev->disk_de = NULL; devfs_dealloc_unique_number(&disc_numspace, dev->number); #endif } @@ -363,7 +387,7 @@ void register_disk(struct gendisk *disk, kdev_t dev, unsigned minors, void update_partition(struct gendisk *disk, int part) { - struct hd_struct *p = disk->part + part; + struct hd_struct *p = disk->part + part - 1; struct device *dev = &p->hd_driverfs_dev; if (!p->nr_sects) { @@ -402,7 +426,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (res) return res; bdev->bd_invalidated = 0; - for (p = 1; p < (1<minor_shift); p++) { + for (p = 0; p < (1<minor_shift) - 1; p++) { disk->part[p].start_sect = 0; disk->part[p].nr_sects = 0; } @@ -441,22 +465,32 @@ fail: static int wipe_partitions(struct gendisk *disk) { int max_p = 1 << disk->minor_shift; + kdev_t devp; + int res; int p; /* invalidate stuff */ - for (p = max_p - 1; p >= 0; p--) { - kdev_t devp = mk_kdev(disk->major,disk->first_minor + p); - int res; + for (p = max_p - 1; p > 0; p--) { + devp = mk_kdev(disk->major,disk->first_minor + p); #if 0 /* %%% superfluous? */ - if (disk->part[p].nr_sects == 0) + if (disk->part[p-1].nr_sects == 0) continue; #endif res = invalidate_device(devp, 1); if (res) return res; - disk->part[p].start_sect = 0; - disk->part[p].nr_sects = 0; + disk->part[p-1].start_sect = 0; + disk->part[p-1].nr_sects = 0; } + devp = mk_kdev(disk->major,disk->first_minor); +#if 0 /* %%% superfluous? */ + if (disk->part[p].nr_sects == 0) + continue; +#endif + res = invalidate_device(devp, 1); + if (res) + return res; + disk->capacity = 0; return 0; } @@ -466,4 +500,8 @@ void del_gendisk(struct gendisk *disk) wipe_partitions(disk); unlink_gendisk(disk); devfs_remove_partitions(disk); + if (disk->part) { + kfree(disk->part); + disk->part = NULL; + } } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0bd31339d6f5..ecd747fbe569 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -79,11 +79,14 @@ struct gendisk { struct hd_struct *part; /* [indexed by minor] */ struct gendisk *next; struct block_device_operations *fops; + sector_t capacity; - devfs_handle_t de; - struct device *driverfs_dev; int flags; int number; /* devfs crap */ + devfs_handle_t de; /* more of the same */ + devfs_handle_t disk_de; /* piled higher and deeper */ + struct device *driverfs_dev; + struct device disk_dev; }; /* drivers/block/genhd.c */ @@ -97,11 +100,11 @@ static inline unsigned long get_start_sect(struct block_device *bdev) } static inline sector_t get_capacity(struct gendisk *disk) { - return disk->part[0].nr_sects; + return disk->capacity; } static inline void set_capacity(struct gendisk *disk, sector_t size) { - disk->part[0].nr_sects = size; + disk->capacity = size; } #endif /* __KERNEL__ */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 501b97902799..936a8eb9132d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -508,7 +508,6 @@ typedef struct ide_drive_s { struct hwif_s *hwif; /* actually (ide_hwif_t *) */ wait_queue_head_t wqueue; /* used to wait for drive in open() */ struct hd_driveid *id; /* drive model identification info */ - struct hd_struct *part; /* drive partition table */ char name[4]; /* drive name, such as "hda" */ struct ide_driver_s *driver; /* (ide_driver_t *) */ void *driver_data; /* extra driver data */ diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index a9cca6e4da8f..f95cdf658a39 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -61,7 +61,6 @@ #define MD_PATCHLEVEL_VERSION 0 extern int md_size[MAX_MD_DEVS]; -extern struct hd_struct md_hd_struct[MAX_MD_DEVS]; extern char * partition_name (kdev_t dev); extern inline char * bdev_partition_name (struct block_device *bdev) -- cgit v1.2.3 From 36780249955396f8c9a76192f360ac3a8e9d1e01 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Sat, 7 Sep 2002 18:23:34 -0700 Subject: [PATCH] pci bus resources, transparent bridges Added PCI_BUS_NUM_RESOURCES as Ben suggested. Default value is 4 and can be overridden by arch (probably in asm/system.h). pci_read_bridge_bases() and pci_assign_bus_resource() changed accordingly. "for (i = 0 ; i < 4; i++)" in pci_add_new_bus() not changed, as it's used _only_ for pci-pci and cardbus bridges. --- arch/i386/pci/fixup.c | 17 +++++++++++++++++ drivers/pci/probe.c | 26 +++++++++++--------------- drivers/pci/quirks.c | 12 ++++++++++++ drivers/pci/setup-res.c | 2 +- include/linux/pci.h | 10 +++++++++- 5 files changed, 50 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index 7165d0fd549f..ca5e8fa1b9eb 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -166,6 +166,22 @@ static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d) } } +/* + * For some reasons Intel decided that certain parts of their + * 815, 845 and some other chipsets must look like PCI-to-PCI bridges + * while they are obviously not. The 82801 family (AA, AB, BAM/CAM, + * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according + * to Intel terminology. These devices do forward all addresses from + * system to PCI bus no matter what are their window settings, so they are + * "transparent" (or subtractive decoding) from programmers point of view. + */ +static void __init pci_fixup_transparent_bridge(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (dev->device & 0xff00) == 0x2400) + dev->transparent = 1; +} + struct pci_fixup pcibios_fixups[] = { { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx }, { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx }, @@ -183,5 +199,6 @@ struct pci_fixup pcibios_fixups[] = { { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug }, { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug }, { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge }, { 0 } }; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 060b94008a15..6460e5f08eda 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -128,6 +128,13 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) if (!dev) /* It's a host bus, nothing to read */ return; + if (dev->transparent) { + printk("Transparent bridge - %s\n", dev->name); + for(i = 0; i < PCI_BUS_NUM_RESOURCES; i++) + child->resource[i] = child->parent->resource[i]; + return; + } + for(i=0; i<3; i++) child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i]; @@ -149,13 +156,6 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO; res->start = base; res->end = limit + 0xfff; - } else { - /* - * Ugh. We don't know enough about this bridge. Just assume - * that it's entirely transparent. - */ - printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 0); - child->resource[0] = child->parent->resource[0]; } res = child->resource[1]; @@ -167,10 +167,6 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; res->start = base; res->end = limit + 0xfffff; - } else { - /* See comment above. Same thing */ - printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 1); - child->resource[1] = child->parent->resource[1]; } res = child->resource[2]; @@ -197,10 +193,6 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; res->start = base; res->end = limit + 0xfffff; - } else { - /* See comments above */ - printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 2); - child->resource[2] = child->parent->resource[2]; } } @@ -389,6 +381,10 @@ int pci_setup_device(struct pci_dev * dev) case PCI_HEADER_TYPE_BRIDGE: /* bridge header */ if (class != PCI_CLASS_BRIDGE_PCI) goto bad; + /* The PCI-to-PCI bridge spec requires that subtractive + decoding (i.e. transparent) bridge must have programming + interface code of 0x01. */ + dev->transparent = ((class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); break; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e66a3237f252..43aea3183d77 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -471,6 +471,11 @@ static void __init quirk_dunord ( struct pci_dev * dev ) r -> end = 0xffffff; } +static void __init quirk_transparent_bridge(struct pci_dev *dev) +{ + dev->transparent = 1; +} + /* * The main table of quirks. */ @@ -525,6 +530,13 @@ static struct pci_fixup pci_fixups[] __initdata = { { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410, quirk_amd_ioapic }, { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering }, + /* + * i82380FB mobile docking controller: its PCI-to-PCI bridge + * is subtractive decoding (transparent), and does indicate this + * in the ProgIf. Unfortunately, the ProgIf value is wrong - 0x80 + * instead of 0x01. + */ + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82380FB, quirk_transparent_bridge }, { 0 } }; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 33efcd5717e9..da3eb2e5f5cc 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -73,7 +73,7 @@ static int pci_assign_bus_resource(const struct pci_bus *bus, int i; type_mask |= IORESOURCE_IO | IORESOURCE_MEM; - for (i = 0 ; i < 4; i++) { + for (i = 0 ; i < PCI_BUS_NUM_RESOURCES; i++) { struct resource *r = bus->resource[i]; if (!r) continue; diff --git a/include/linux/pci.h b/include/linux/pci.h index 3c76341f02bf..b82ec8e41174 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -386,6 +386,9 @@ struct pci_dev { int ro; /* ISAPnP: read only */ unsigned short regs; /* ISAPnP: supported registers */ + /* These fields are used by common fixups */ + unsigned short transparent:1; /* Transparent PCI bridge */ + int (*prepare)(struct pci_dev *dev); /* ISAPnP hooks */ int (*activate)(struct pci_dev *dev); int (*deactivate)(struct pci_dev *dev); @@ -406,6 +409,10 @@ struct pci_dev { #define PCI_ROM_RESOURCE 6 #define PCI_BRIDGE_RESOURCES 7 #define PCI_NUM_RESOURCES 11 + +#ifndef PCI_BUS_NUM_RESOURCES +#define PCI_BUS_NUM_RESOURCES 4 +#endif #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ @@ -415,7 +422,8 @@ struct pci_bus { struct list_head children; /* list of child buses */ struct list_head devices; /* list of devices on this bus */ struct pci_dev *self; /* bridge device as seen by parent */ - struct resource *resource[4]; /* address space routed to this bus */ + struct resource *resource[PCI_BUS_NUM_RESOURCES]; + /* address space routed to this bus */ struct pci_ops *ops; /* configuration access functions */ void *sysdata; /* hook for sys-specific extension */ -- cgit v1.2.3 From 6dfc88977e42a9d47c8e0c450c879610107bee36 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 7 Sep 2002 21:13:50 -0700 Subject: [PATCH] shared thread signals Support POSIX compliant thread signals on a kernel level with usable debugging (broadcast SIGSTOP, SIGCONT) and thread group management (broadcast SIGKILL), plus to load-balance 'process' signals between threads for better signal performance. Changes: - POSIX thread semantics for signals there are 7 'types' of actions a signal can take: specific, load-balance, kill-all, kill-all+core, stop-all, continue-all and ignore. Depending on the POSIX specifications each signal has one of the types defined for both the 'handler defined' and the 'handler not defined (kernel default)' case. Here is the table: ---------------------------------------------------------- | | userspace | kernel | ---------------------------------------------------------- | SIGHUP | load-balance | kill-all | | SIGINT | load-balance | kill-all | | SIGQUIT | load-balance | kill-all+core | | SIGILL | specific | kill-all+core | | SIGTRAP | specific | kill-all+core | | SIGABRT/SIGIOT | specific | kill-all+core | | SIGBUS | specific | kill-all+core | | SIGFPE | specific | kill-all+core | | SIGKILL | n/a | kill-all | | SIGUSR1 | load-balance | kill-all | | SIGSEGV | specific | kill-all+core | | SIGUSR2 | load-balance | kill-all | | SIGPIPE | specific | kill-all | | SIGALRM | load-balance | kill-all | | SIGTERM | load-balance | kill-all | | SIGCHLD | load-balance | ignore | | SIGCONT | load-balance | continue-all | | SIGSTOP | n/a | stop-all | | SIGTSTP | load-balance | stop-all | | SIGTTIN | load-balancen | stop-all | | SIGTTOU | load-balancen | stop-all | | SIGURG | load-balance | ignore | | SIGXCPU | specific | kill-all+core | | SIGXFSZ | specific | kill-all+core | | SIGVTALRM | load-balance | kill-all | | SIGPROF | specific | kill-all | | SIGPOLL/SIGIO | load-balance | kill-all | | SIGSYS/SIGUNUSED | specific | kill-all+core | | SIGSTKFLT | specific | kill-all | | SIGWINCH | load-balance | ignore | | SIGPWR | load-balance | kill-all | | SIGRTMIN-SIGRTMAX | load-balance | kill-all | ---------------------------------------------------------- as you can see it from the list, signals that have handlers defined never get broadcasted - they are either specific or load-balanced. - CLONE_THREAD implies CLONE_SIGHAND It does not make much sense to have a thread group that does not share signal handlers. In fact in the patch i'm using the signal spinlock to lock access to the thread group. I made the siglock IRQ-safe, thus we can load-balance signals from interrupt contexts as well. (we cannot take the tasklist lock in write mode from IRQ handlers.) this is not as clean as i'd like it to be, but it's the best i could come up with so far. - thread group list management reworked. threads are now removed from the group if the thread is unhashed from the PID table. This makes the most sense. This also helps with another feature that relies on an intact thread group list: multithreaded coredumps. - child reparenting reworked. the O(N) algorithm in forget_original_parent() causes massive performance problems if a large number of threads exit from the group. Performance improves more than 10-fold if the following simple rules are followed instead: - reparent children to the *previous* thread [exiting or not] - if a thread is detached then reparent to init. - fast broadcasting of kernel-internal SIGSTOP, SIGCONT, SIGKILL, etc. kernel-internal broadcasted signals are a potential DoS problem, since they might generate massive amounts of GFP_ATOMIC allocations of siginfo structures. The important thing to note is that the siginfo structure does not actually have to be allocated and queued - the signal processing code has all the information it needs, neither of these signals carries any information in the siginfo structure. This makes a broadcast SIGKILL a very simple operation: all threads get the bit 9 set in their pending bitmask. The speedup due to this was significant - and the robustness win is invaluable. - sys_execve() should not kill off 'all other' threads. the 'exec kills all threads if the master thread does the exec()' is a POSIX(-ish) thing that should not be hardcoded in the kernel in this case. to handle POSIX exec() semantics, glibc uses a special syscall, which kills 'all but self' threads: sys_exit_allbutself(). the straightforward exec() implementation just calls sys_exit_allbutself() and then sys_execve(). (this syscall is also be used internally if the thread group leader thread sys_exit()s or sys_exec()s, to ensure the integrity of the thread group.) --- fs/exec.c | 47 +--- include/asm-i386/spinlock.h | 2 + include/linux/sched.h | 53 +++-- kernel/exit.c | 25 +-- kernel/fork.c | 11 + kernel/sched.c | 9 + kernel/signal.c | 537 ++++++++++++++++++++++++++++++++++---------- 7 files changed, 491 insertions(+), 193 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 769380e3899a..9c00b733fcee 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -504,6 +504,8 @@ static inline int make_private_signals(void) { struct signal_struct * newsig; + remove_thread_group(current, current->sig); + if (atomic_read(¤t->sig->count) <= 1) return 0; newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL); @@ -575,42 +577,10 @@ static inline void flush_old_files(struct files_struct * files) */ static void de_thread(struct task_struct *tsk) { - struct task_struct *sub; - struct list_head *head, *ptr; - struct siginfo info; - int pause; - - write_lock_irq(&tasklist_lock); - - if (tsk->tgid != tsk->pid) { - /* subsidiary thread - just escapes the group */ - list_del_init(&tsk->thread_group); - tsk->tgid = tsk->pid; - pause = 0; - } - else { - /* master thread - kill all subsidiary threads */ - info.si_signo = SIGKILL; - info.si_errno = 0; - info.si_code = SI_DETHREAD; - info.si_pid = current->pid; - info.si_uid = current->uid; - - head = tsk->thread_group.next; - list_del_init(&tsk->thread_group); - - list_for_each(ptr,head) { - sub = list_entry(ptr,struct task_struct,thread_group); - send_sig_info(SIGKILL,&info,sub); - } - - pause = 1; - } - - write_unlock_irq(&tasklist_lock); - - /* give the subsidiary threads a chance to clean themselves up */ - if (pause) yield(); + if (!list_empty(&tsk->thread_group)) + BUG(); + /* An exec() starts a new thread group: */ + tsk->tgid = tsk->pid; } int flush_old_exec(struct linux_binprm * bprm) @@ -633,6 +603,8 @@ int flush_old_exec(struct linux_binprm * bprm) if (retval) goto mmap_failed; /* This is the point of no return */ + de_thread(current); + release_old_signals(oldsig); current->sas_ss_sp = current->sas_ss_size = 0; @@ -651,9 +623,6 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); - if (!list_empty(¤t->thread_group)) - de_thread(current); - if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || permission(bprm->file->f_dentry->d_inode,MAY_READ)) current->mm->dumpable = 0; diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index d565c78c2d81..d26bf652d894 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -158,6 +158,8 @@ typedef struct { #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) +#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS) + /* * On x86, we implement read-write locks as a 32-bit counter * with the high bit (sign) being the "contended" bit. diff --git a/include/linux/sched.h b/include/linux/sched.h index 896b7f59941c..bd7073fdefaf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -211,6 +211,11 @@ struct signal_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; + + /* current thread group signal load-balancing target: */ + task_t *curr_target; + + struct sigpending shared_pending; }; /* @@ -356,7 +361,7 @@ struct task_struct { spinlock_t sigmask_lock; /* Protects signal and blocked */ struct signal_struct *sig; - sigset_t blocked; + sigset_t blocked, real_blocked, shared_unblocked; struct sigpending pending; unsigned long sas_ss_sp; @@ -431,6 +436,7 @@ extern void set_cpus_allowed(task_t *p, unsigned long new_mask); extern void set_user_nice(task_t *p, long nice); extern int task_prio(task_t *p); extern int task_nice(task_t *p); +extern int task_curr(task_t *p); extern int idle_cpu(int cpu); void yield(void); @@ -535,7 +541,7 @@ extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *); extern void sig_exit(int, int, struct siginfo *); -extern int dequeue_signal(sigset_t *, siginfo_t *); +extern int dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t *info); extern void block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask); extern void unblock_all_signals(void); @@ -654,6 +660,7 @@ extern void exit_thread(void); extern void exit_mm(struct task_struct *); extern void exit_files(struct task_struct *); extern void exit_sighand(struct task_struct *); +extern void remove_thread_group(struct task_struct *tsk, struct signal_struct *sig); extern void reparent_to_init(void); extern void daemonize(void); @@ -786,8 +793,29 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) #define for_each_thread(task) \ for (task = next_thread(current) ; task != current ; task = next_thread(task)) -#define next_thread(p) \ - list_entry((p)->thread_group.next, struct task_struct, thread_group) +static inline task_t *next_thread(task_t *p) +{ + if (!p->sig) + BUG(); +#if CONFIG_SMP + if (!spin_is_locked(&p->sig->siglock) && + !rwlock_is_locked(&tasklist_lock)) + BUG(); +#endif + return list_entry((p)->thread_group.next, task_t, thread_group); +} + +static inline task_t *prev_thread(task_t *p) +{ + if (!p->sig) + BUG(); +#if CONFIG_SMP + if (!spin_is_locked(&p->sig->siglock) && + !rwlock_is_locked(&tasklist_lock)) + BUG(); +#endif + return list_entry((p)->thread_group.prev, task_t, thread_group); +} #define thread_group_leader(p) (p->pid == p->tgid) @@ -903,21 +931,8 @@ static inline void cond_resched(void) This is required every time the blocked sigset_t changes. Athread cathreaders should have t->sigmask_lock. */ -static inline void recalc_sigpending_tsk(struct task_struct *t) -{ - if (has_pending_signals(&t->pending.signal, &t->blocked)) - set_tsk_thread_flag(t, TIF_SIGPENDING); - else - clear_tsk_thread_flag(t, TIF_SIGPENDING); -} - -static inline void recalc_sigpending(void) -{ - if (has_pending_signals(¤t->pending.signal, ¤t->blocked)) - set_thread_flag(TIF_SIGPENDING); - else - clear_thread_flag(TIF_SIGPENDING); -} +extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t)); +extern void recalc_sigpending(void); /* * Wrappers for p->thread_info->cpu access. No-op on UP. diff --git a/kernel/exit.c b/kernel/exit.c index 7fe43d30ef87..94c935ae59a8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -36,7 +36,6 @@ static inline void __unhash_process(struct task_struct *p) nr_threads--; unhash_pid(p); REMOVE_LINKS(p); - list_del(&p->thread_group); p->pid = 0; proc_dentry = p->proc_dentry; if (unlikely(proc_dentry != NULL)) { @@ -73,6 +72,7 @@ static void release_task(struct task_struct * p) } BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); unhash_process(p); + exit_sighand(p); release_thread(p); if (p != current) { @@ -244,7 +244,8 @@ void daemonize(void) static void reparent_thread(task_t *p, task_t *reaper, task_t *child_reaper) { /* We dont want people slaying init */ - p->exit_signal = SIGCHLD; + if (p->exit_signal != -1) + p->exit_signal = SIGCHLD; p->self_exec_id++; /* Make sure we're not reparenting to ourselves */ @@ -412,18 +413,15 @@ void exit_mm(struct task_struct *tsk) */ static inline void forget_original_parent(struct task_struct * father) { - struct task_struct *p, *reaper; + struct task_struct *p, *reaper = father; struct list_head *_p; - read_lock(&tasklist_lock); + write_lock_irq(&tasklist_lock); - /* Next in our thread group, if they're not already exiting */ - reaper = father; - do { - reaper = next_thread(reaper); - if (!(reaper->flags & PF_EXITING)) - break; - } while (reaper != father); + if (father->exit_signal != -1) + reaper = prev_thread(reaper); + else + reaper = child_reaper; if (reaper == father) reaper = child_reaper; @@ -444,7 +442,7 @@ static inline void forget_original_parent(struct task_struct * father) p = list_entry(_p,struct task_struct,ptrace_list); reparent_thread(p, reaper, child_reaper); } - read_unlock(&tasklist_lock); + write_unlock_irq(&tasklist_lock); } static inline void zap_thread(task_t *p, task_t *father, int traced) @@ -604,7 +602,6 @@ fake_volatile: __exit_files(tsk); __exit_fs(tsk); exit_namespace(tsk); - exit_sighand(tsk); exit_thread(); if (current->leader) @@ -763,6 +760,8 @@ repeat: if (options & __WNOTHREAD) break; tsk = next_thread(tsk); + if (tsk->sig != current->sig) + BUG(); } while (tsk != current); read_unlock(&tasklist_lock); if (flag) { diff --git a/kernel/fork.c b/kernel/fork.c index 7166c99e9f9c..4e369b9734b6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -630,6 +630,9 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t spin_lock_init(&sig->siglock); atomic_set(&sig->count, 1); memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action)); + sig->curr_target = NULL; + init_sigpending(&sig->shared_pending); + return 0; } @@ -664,6 +667,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); + /* + * Thread groups must share signals as well: + */ + if (clone_flags & CLONE_THREAD) + clone_flags |= CLONE_SIGHAND; + retval = security_ops->task_create(clone_flags); if (retval) goto fork_out; @@ -843,8 +852,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { + spin_lock(¤t->sig->siglock); p->tgid = current->tgid; list_add(&p->thread_group, ¤t->thread_group); + spin_unlock(¤t->sig->siglock); } SET_LINKS(p); diff --git a/kernel/sched.c b/kernel/sched.c index e1e08ecd37b4..625acc09ec17 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1335,6 +1335,15 @@ int task_nice(task_t *p) return TASK_NICE(p); } +/** + * task_curr - is this task currently executing on a CPU? + * @p: the task in question. + */ +int task_curr(task_t *p) +{ + return cpu_curr(task_cpu(p)) == p; +} + /** * idle_cpu - is a given cpu idle currently? * @cpu: the processor in question. diff --git a/kernel/signal.c b/kernel/signal.c index 8de729b6ee20..5ca0ca83eb2f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -6,6 +6,8 @@ * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson */ +#define __KERNEL_SYSCALLS__ + #include #include #include @@ -16,7 +18,7 @@ #include #include #include - +#include #include #include @@ -24,40 +26,146 @@ * SLAB caches for signal bits. */ -#define DEBUG_SIG 0 - -#if DEBUG_SIG -#define SIG_SLAB_DEBUG (SLAB_RED_ZONE /* | SLAB_POISON */) -#else -#define SIG_SLAB_DEBUG 0 -#endif - static kmem_cache_t *sigqueue_cachep; atomic_t nr_queued_signals; int max_queued_signals = 1024; +/********************************************************* + + POSIX thread group signal behavior: + +---------------------------------------------------------- +| | userspace | kernel | +---------------------------------------------------------- +| SIGHUP | load-balance | kill-all | +| SIGINT | load-balance | kill-all | +| SIGQUIT | load-balance | kill-all+core | +| SIGILL | specific | kill-all+core | +| SIGTRAP | specific | kill-all+core | +| SIGABRT/SIGIOT | specific | kill-all+core | +| SIGBUS | specific | kill-all+core | +| SIGFPE | specific | kill-all+core | +| SIGKILL | n/a | kill-all | +| SIGUSR1 | load-balance | kill-all | +| SIGSEGV | specific | kill-all+core | +| SIGUSR2 | load-balance | kill-all | +| SIGPIPE | specific | kill-all | +| SIGALRM | load-balance | kill-all | +| SIGTERM | load-balance | kill-all | +| SIGCHLD | load-balance | ignore | +| SIGCONT | specific | continue-all | +| SIGSTOP | n/a | stop-all | +| SIGTSTP | load-balance | stop-all | +| SIGTTIN | load-balance | stop-all | +| SIGTTOU | load-balance | stop-all | +| SIGURG | load-balance | ignore | +| SIGXCPU | specific | kill-all+core | +| SIGXFSZ | specific | kill-all+core | +| SIGVTALRM | load-balance | kill-all | +| SIGPROF | specific | kill-all | +| SIGPOLL/SIGIO | load-balance | kill-all | +| SIGSYS/SIGUNUSED | specific | kill-all+core | +| SIGSTKFLT | specific | kill-all | +| SIGWINCH | load-balance | ignore | +| SIGPWR | load-balance | kill-all | +| SIGRTMIN-SIGRTMAX | load-balance | kill-all | +---------------------------------------------------------- +*/ + +#define M(sig) (1UL << (sig)) + +#define SIG_USER_SPECIFIC_MASK (\ + M(SIGILL) | M(SIGTRAP) | M(SIGABRT) | M(SIGBUS) | \ + M(SIGFPE) | M(SIGSEGV) | M(SIGPIPE) | M(SIGXFSZ) | \ + M(SIGPROF) | M(SIGSYS) | M(SIGSTKFLT) | M(SIGCONT) ) + +#define SIG_USER_LOAD_BALANCE_MASK (\ + M(SIGHUP) | M(SIGINT) | M(SIGQUIT) | M(SIGUSR1) | \ + M(SIGUSR2) | M(SIGALRM) | M(SIGTERM) | M(SIGCHLD) | \ + M(SIGURG) | M(SIGVTALRM) | M(SIGPOLL) | M(SIGWINCH) | \ + M(SIGPWR) | M(SIGTSTP) | M(SIGTTIN) | M(SIGTTOU) ) + +#define SIG_KERNEL_SPECIFIC_MASK (\ + M(SIGCHLD) | M(SIGURG) | M(SIGWINCH) ) + +#define SIG_KERNEL_BROADCAST_MASK (\ + M(SIGHUP) | M(SIGINT) | M(SIGQUIT) | M(SIGILL) | \ + M(SIGTRAP) | M(SIGABRT) | M(SIGBUS) | M(SIGFPE) | \ + M(SIGKILL) | M(SIGUSR1) | M(SIGSEGV) | M(SIGUSR2) | \ + M(SIGPIPE) | M(SIGALRM) | M(SIGTERM) | M(SIGXCPU) | \ + M(SIGXFSZ) | M(SIGVTALRM) | M(SIGPROF) | M(SIGPOLL) | \ + M(SIGSYS) | M(SIGSTKFLT) | M(SIGPWR) | M(SIGCONT) | \ + M(SIGSTOP) | M(SIGTSTP) | M(SIGTTIN) | M(SIGTTOU) ) + +#define SIG_KERNEL_ONLY_MASK (\ + M(SIGKILL) | M(SIGSTOP) ) + +#define SIG_KERNEL_COREDUMP_MASK (\ + M(SIGQUIT) | M(SIGILL) | M(SIGTRAP) | M(SIGABRT) | \ + M(SIGFPE) | M(SIGSEGV) | M(SIGBUS) | M(SIGSYS) | \ + M(SIGXCPU) | M(SIGXFSZ) ) + +#define T(sig, mask) \ + ((1UL << (sig)) & mask) + +#define sig_user_specific(sig) T(sig, SIG_USER_SPECIFIC_MASK) +#define sig_user_load_balance(sig) \ + (T(sig, SIG_USER_LOAD_BALANCE_MASK) || ((sig) >= SIGRTMIN)) +#define sig_kernel_specific(sig) T(sig, SIG_KERNEL_SPECIFIC_MASK) +#define sig_kernel_broadcast(sig) \ + (T(sig, SIG_KERNEL_BROADCAST_MASK) || ((sig) >= SIGRTMIN)) +#define sig_kernel_only(sig) T(sig, SIG_KERNEL_ONLY_MASK) +#define sig_kernel_coredump(sig) T(sig, SIG_KERNEL_COREDUMP_MASK) + +#define sig_user_defined(t, sig) \ + (((t)->sig->action[(sig)-1].sa.sa_handler != SIG_DFL) && \ + ((t)->sig->action[(sig)-1].sa.sa_handler != SIG_IGN)) + +#define sig_ignored(t, sig) \ + (((sig) != SIGCHLD) && \ + ((t)->sig->action[(sig)-1].sa.sa_handler == SIG_IGN)) + void __init signals_init(void) { sigqueue_cachep = kmem_cache_create("sigqueue", sizeof(struct sigqueue), __alignof__(struct sigqueue), - SIG_SLAB_DEBUG, NULL, NULL); + 0, NULL, NULL); if (!sigqueue_cachep) panic("signals_init(): cannot create sigqueue SLAB cache"); } +#define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) + +void recalc_sigpending_tsk(struct task_struct *t) +{ + if (PENDING(&t->pending, &t->blocked) || + PENDING(&t->sig->shared_pending, &t->blocked)) + set_tsk_thread_flag(t, TIF_SIGPENDING); + else + clear_tsk_thread_flag(t, TIF_SIGPENDING); +} + +void recalc_sigpending(void) +{ + if (PENDING(¤t->pending, ¤t->blocked) || + PENDING(¤t->sig->shared_pending, ¤t->blocked)) + set_thread_flag(TIF_SIGPENDING); + else + clear_thread_flag(TIF_SIGPENDING); +} /* Given the mask, find the first available signal that should be serviced. */ static int -next_signal(struct task_struct *tsk, sigset_t *mask) +next_signal(struct sigpending *pending, sigset_t *mask) { unsigned long i, *s, *m, x; int sig = 0; - s = tsk->pending.signal.sig; + s = pending->signal.sig; m = mask->sig; switch (_NSIG_WORDS) { default: @@ -113,15 +221,36 @@ flush_signals(struct task_struct *t) flush_sigqueue(&t->pending); } +void remove_thread_group(struct task_struct *tsk, struct signal_struct *sig) +{ + write_lock_irq(&tasklist_lock); + spin_lock(&tsk->sig->siglock); + + if (tsk == sig->curr_target) + sig->curr_target = next_thread(tsk); + list_del_init(&tsk->thread_group); + + spin_unlock(&tsk->sig->siglock); + write_unlock_irq(&tasklist_lock); +} + void exit_sighand(struct task_struct *tsk) { struct signal_struct * sig = tsk->sig; + if (!sig) + BUG(); + if (!atomic_read(&sig->count)) + BUG(); + remove_thread_group(tsk, sig); + spin_lock_irq(&tsk->sigmask_lock); if (sig) { tsk->sig = NULL; - if (atomic_dec_and_test(&sig->count)) + if (atomic_dec_and_test(&sig->count)) { + flush_sigqueue(&sig->shared_pending); kmem_cache_free(sigact_cachep, sig); + } } clear_tsk_thread_flag(tsk,TIF_SIGPENDING); flush_sigqueue(&tsk->pending); @@ -153,24 +282,10 @@ flush_signal_handlers(struct task_struct *t) void sig_exit(int sig, int exit_code, struct siginfo *info) { - struct task_struct *t; - sigaddset(¤t->pending.signal, sig); recalc_sigpending(); current->flags |= PF_SIGNALED; - /* Propagate the signal to all the tasks in - * our thread group - */ - if (info && (unsigned long)info != 1 - && info->si_code != SI_TKILL) { - read_lock(&tasklist_lock); - for_each_thread(t) { - force_sig_info(sig, info, t); - } - read_unlock(&tasklist_lock); - } - do_exit(exit_code); /* NOTREACHED */ } @@ -209,7 +324,7 @@ unblock_all_signals(void) spin_unlock_irqrestore(¤t->sigmask_lock, flags); } -static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) +static inline int collect_signal(int sig, struct sigpending *list, siginfo_t *info) { if (sigismember(&list->signal, sig)) { /* Collect the siginfo appropriate to this signal. */ @@ -221,9 +336,10 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) pp = &q->next; } - /* Ok, it wasn't in the queue. We must have - been out of queue space. So zero out the - info. */ + /* Ok, it wasn't in the queue. This must be + a fast-pathed signal or we must have been + out of queue space. So zero out the info. + */ sigdelset(&list->signal, sig); info->si_signo = sig; info->si_errno = 0; @@ -261,20 +377,14 @@ found_another: * Dequeue a signal and return the element to the caller, which is * expected to free it. * - * All callers must be holding current->sigmask_lock. + * All callers have to hold the siglock and the sigmask_lock. */ -int -dequeue_signal(sigset_t *mask, siginfo_t *info) +int dequeue_signal(struct sigpending *pending, sigset_t *mask, siginfo_t *info) { int sig = 0; -#if DEBUG_SIG -printk(KERN_DEBUG "SIG dequeue (%s:%d): %d ", current->comm, current->pid, - signal_pending(current)); -#endif - - sig = next_signal(current, mask); + sig = next_signal(pending, mask); if (sig) { if (current->notifier) { if (sigismember(current->notifier_mask, sig)) { @@ -285,7 +395,7 @@ printk(KERN_DEBUG "SIG dequeue (%s:%d): %d ", current->comm, current->pid, } } - if (!collect_signal(sig, ¤t->pending, info)) + if (!collect_signal(sig, pending, info)) sig = 0; /* XXX: Once POSIX.1b timers are in, if si_code == SI_TIMER, @@ -293,10 +403,6 @@ printk(KERN_DEBUG "SIG dequeue (%s:%d): %d ", current->comm, current->pid, } recalc_sigpending(); -#if DEBUG_SIG -printk(KERN_DEBUG " %d -> %d\n", signal_pending(current), sig); -#endif - return sig; } @@ -338,9 +444,10 @@ static int rm_sig_from_queue(int sig, struct task_struct *t) /* * Bad permissions for sending the signal */ -int bad_signal(int sig, struct siginfo *info, struct task_struct *t) +static inline int bad_signal(int sig, struct siginfo *info, struct task_struct *t) { - return (!info || ((unsigned long)info != 1 && SI_FROMUSER(info))) + return (!info || ((unsigned long)info != 1 && + (unsigned long)info != 2 && SI_FROMUSER(info))) && ((sig != SIGCONT) || (current->session != t->session)) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) @@ -436,6 +543,13 @@ static int send_signal(int sig, struct siginfo *info, struct sigpending *signals { struct sigqueue * q = NULL; + /* + * fast-pathed signals for kernel-internal things like SIGSTOP + * or SIGKILL. + */ + if ((unsigned long)info == 2) + goto out_set; + /* Real-time signals must be queued if sent by sigqueue, or some other real-time mechanism. It is implementation defined whether kill() does so. We attempt to do so, on @@ -444,9 +558,8 @@ static int send_signal(int sig, struct siginfo *info, struct sigpending *signals make sure at least one signal gets delivered and don't pass on the info struct. */ - if (atomic_read(&nr_queued_signals) < max_queued_signals) { + if (atomic_read(&nr_queued_signals) < max_queued_signals) q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC); - } if (q) { atomic_inc(&nr_queued_signals); @@ -473,14 +586,14 @@ static int send_signal(int sig, struct siginfo *info, struct sigpending *signals break; } } else if (sig >= SIGRTMIN && info && (unsigned long)info != 1 - && info->si_code != SI_USER) { + && info->si_code != SI_USER) /* * Queue overflow, abort. We may abort if the signal was rt * and sent by user using something other than kill(). */ return -EAGAIN; - } +out_set: sigaddset(&signals->signal, sig); return 0; } @@ -528,17 +641,17 @@ static int deliver_signal(int sig, struct siginfo *info, struct task_struct *t) return retval; } -int -send_sig_info(int sig, struct siginfo *info, struct task_struct *t) +static int +__send_sig_info(int sig, struct siginfo *info, struct task_struct *t, int shared) { - unsigned long flags; int ret; - -#if DEBUG_SIG -printk(KERN_DEBUG "SIG queue (%s:%d): %d ", t->comm, t->pid, sig); + if (!irqs_disabled()) + BUG(); +#if CONFIG_SMP + if (!spin_is_locked(&t->sig->siglock)) + BUG(); #endif - ret = -EINVAL; if (sig < 0 || sig > _NSIG) goto out_nolock; @@ -556,7 +669,7 @@ printk(KERN_DEBUG "SIG queue (%s:%d): %d ", t->comm, t->pid, sig); if (!sig || !t->sig) goto out_nolock; - spin_lock_irqsave(&t->sigmask_lock, flags); + spin_lock(&t->sigmask_lock); handle_stop_signal(sig, t); /* Optimize away the signal, if it's a signal that can be @@ -566,20 +679,25 @@ printk(KERN_DEBUG "SIG queue (%s:%d): %d ", t->comm, t->pid, sig); if (ignored_signal(sig, t)) goto out; - /* Support queueing exactly one non-rt signal, so that we - can get more detailed information about the cause of - the signal. */ - if (sig < SIGRTMIN && sigismember(&t->pending.signal, sig)) - goto out; +#define LEGACY_QUEUE(sigptr, sig) \ + (((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig))) + + if (!shared) { + /* Support queueing exactly one non-rt signal, so that we + can get more detailed information about the cause of + the signal. */ + if (LEGACY_QUEUE(&t->pending, sig)) + goto out; - ret = deliver_signal(sig, info, t); + ret = deliver_signal(sig, info, t); + } else { + if (LEGACY_QUEUE(&t->sig->shared_pending, sig)) + goto out; + ret = send_signal(sig, info, &t->sig->shared_pending); + } out: - spin_unlock_irqrestore(&t->sigmask_lock, flags); + spin_unlock(&t->sigmask_lock); out_nolock: -#if DEBUG_SIG -printk(KERN_DEBUG " %d -> %d\n", signal_pending(t), ret); -#endif - return ret; } @@ -605,7 +723,157 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) recalc_sigpending_tsk(t); spin_unlock_irqrestore(&t->sigmask_lock, flags); - return send_sig_info(sig, info, t); + return send_sig_info(sig, (void *)1, t); +} + +static int +__force_sig_info(int sig, struct task_struct *t) +{ + unsigned long int flags; + + spin_lock_irqsave(&t->sigmask_lock, flags); + if (t->sig == NULL) { + spin_unlock_irqrestore(&t->sigmask_lock, flags); + return -ESRCH; + } + + if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN) + t->sig->action[sig-1].sa.sa_handler = SIG_DFL; + sigdelset(&t->blocked, sig); + recalc_sigpending_tsk(t); + spin_unlock_irqrestore(&t->sigmask_lock, flags); + + return __send_sig_info(sig, (void *)2, t, 0); +} + +#define can_take_signal(p, sig) \ + (((unsigned long) p->sig->action[sig-1].sa.sa_handler > 1) && \ + !sigismember(&p->blocked, sig) && (task_curr(p) || !signal_pending(p))) + +static inline +int load_balance_thread_group(struct task_struct *p, int sig, + struct siginfo *info) +{ + struct task_struct *tmp; + int ret; + + /* + * if the specified thread is not blocking this signal + * then deliver it. + */ + if (can_take_signal(p, sig)) + return __send_sig_info(sig, info, p, 0); + + /* + * Otherwise try to find a suitable thread. + * If no such thread is found then deliver to + * the original thread. + */ + + tmp = p->sig->curr_target; + + if (!tmp || tmp->tgid != p->tgid) + /* restart balancing at this thread */ + p->sig->curr_target = p; + + else for (;;) { + if (list_empty(&p->thread_group)) + BUG(); + if (!tmp || tmp->tgid != p->tgid) + BUG(); + + /* + * Do not send signals that are ignored or blocked, + * or to not-running threads that are overworked: + */ + if (!can_take_signal(tmp, sig)) { + tmp = next_thread(tmp); + p->sig->curr_target = tmp; + if (tmp == p) + break; + continue; + } + ret = __send_sig_info(sig, info, tmp, 0); + return ret; + } + /* + * No suitable thread was found - put the signal + * into the shared-pending queue. + */ + return __send_sig_info(sig, info, p, 1); +} + +int __broadcast_thread_group(struct task_struct *p, int sig) +{ + struct task_struct *tmp; + struct list_head *entry; + int err = 0; + + /* send a signal to the head of the list */ + err = __force_sig_info(sig, p); + + /* send a signal to all members of the list */ + list_for_each(entry, &p->thread_group) { + tmp = list_entry(entry, task_t, thread_group); + err = __force_sig_info(sig, tmp); + } + return err; +} + +int +send_sig_info(int sig, struct siginfo *info, struct task_struct *p) +{ + unsigned long flags; + int ret = 0; + + if (!p) + BUG(); + if (!p->sig) + BUG(); + spin_lock_irqsave(&p->sig->siglock, flags); + + /* not a thread group - normal signal behavior */ + if (list_empty(&p->thread_group) || !sig) + goto out_send; + + if (sig_user_defined(p, sig)) { + if (sig_user_specific(sig)) + goto out_send; + if (sig_user_load_balance(sig)) { + ret = load_balance_thread_group(p, sig, info); + goto out_unlock; + } + + /* must not happen */ + BUG(); + } + /* optimize away ignored signals: */ + if (sig_ignored(p, sig)) + goto out_unlock; + + /* blocked (or ptraced) signals get posted */ + spin_lock(&p->sigmask_lock); + if ((p->ptrace & PT_PTRACED) || sigismember(&p->blocked, sig) || + sigismember(&p->real_blocked, sig)) { + spin_unlock(&p->sigmask_lock); + goto out_send; + } + spin_unlock(&p->sigmask_lock); + + if (sig_kernel_broadcast(sig) || sig_kernel_coredump(sig)) { + ret = __broadcast_thread_group(p, sig); + goto out_unlock; + } + if (sig_kernel_specific(sig)) + goto out_send; + + /* must not happen */ + BUG(); +out_send: + ret = __send_sig_info(sig, info, p, 0); +out_unlock: + spin_unlock_irqrestore(&p->sig->siglock, flags); + return ret; } /* @@ -670,15 +938,8 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) read_lock(&tasklist_lock); p = find_task_by_pid(pid); error = -ESRCH; - if (p) { - if (!thread_group_leader(p)) { - struct task_struct *tg; - tg = find_task_by_pid(p->tgid); - if (tg) - p = tg; - } + if (p) error = send_sig_info(sig, info, p); - } read_unlock(&tasklist_lock); return error; } @@ -755,25 +1016,36 @@ kill_proc(pid_t pid, int sig, int priv) * Joy. Or not. Pthread wants us to wake up every thread * in our parent group. */ -static void wake_up_parent(struct task_struct *parent) +static inline void wake_up_parent(struct task_struct *p) { - struct task_struct *tsk = parent; + struct task_struct *parent = p->parent, *tsk = parent; + /* + * Fortunately this is not necessary for thread groups: + */ + if (p->tgid == tsk->tgid) { + wake_up_interruptible(&tsk->wait_chldexit); + return; + } + spin_lock_irq(&parent->sig->siglock); do { wake_up_interruptible(&tsk->wait_chldexit); tsk = next_thread(tsk); + if (tsk->sig != parent->sig) + BUG(); } while (tsk != parent); + spin_unlock_irq(&parent->sig->siglock); } /* * Let a parent know about a status change of a child. */ + void do_notify_parent(struct task_struct *tsk, int sig) { struct siginfo info; int why, status; - /* is the thread detached? */ if (sig == -1) BUG(); @@ -812,7 +1084,7 @@ void do_notify_parent(struct task_struct *tsk, int sig) info.si_status = status; send_sig_info(sig, &info, tsk->parent); - wake_up_parent(tsk->parent); + wake_up_parent(tsk); } @@ -837,13 +1109,24 @@ notify_parent(struct task_struct *tsk, int sig) int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs) { + sigset_t *mask = ¤t->blocked; + for (;;) { - unsigned long signr; + unsigned long signr = 0; struct k_sigaction *ka; - spin_lock_irq(¤t->sigmask_lock); - signr = dequeue_signal(¤t->blocked, info); - spin_unlock_irq(¤t->sigmask_lock); + local_irq_disable(); + if (current->sig->shared_pending.head) { + spin_lock(¤t->sig->siglock); + signr = dequeue_signal(¤t->sig->shared_pending, mask, info); + spin_unlock(¤t->sig->siglock); + } + if (!signr) { + spin_lock(¤t->sigmask_lock); + signr = dequeue_signal(¤t->pending, mask, info); + spin_unlock(¤t->sigmask_lock); + } + local_irq_enable(); if (!signr) break; @@ -1137,8 +1420,11 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, return -EINVAL; } - spin_lock_irq(¤t->sigmask_lock); - sig = dequeue_signal(&these, &info); + spin_lock_irq(¤t->sig->siglock); + spin_lock(¤t->sigmask_lock); + sig = dequeue_signal(¤t->sig->shared_pending, &these, &info); + if (!sig) + sig = dequeue_signal(¤t->pending, &these, &info); if (!sig) { timeout = MAX_SCHEDULE_TIMEOUT; if (uts) @@ -1149,21 +1435,27 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, /* None ready -- temporarily unblock those we're * interested while we are sleeping in so that we'll * be awakened when they arrive. */ - sigset_t oldblocked = current->blocked; + current->real_blocked = current->blocked; sigandsets(¤t->blocked, ¤t->blocked, &these); recalc_sigpending(); - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock(¤t->sigmask_lock); + spin_unlock_irq(¤t->sig->siglock); current->state = TASK_INTERRUPTIBLE; timeout = schedule_timeout(timeout); - spin_lock_irq(¤t->sigmask_lock); - sig = dequeue_signal(&these, &info); - current->blocked = oldblocked; + spin_lock_irq(¤t->sig->siglock); + spin_lock(¤t->sigmask_lock); + sig = dequeue_signal(¤t->sig->shared_pending, &these, &info); + if (!sig) + sig = dequeue_signal(¤t->pending, &these, &info); + current->blocked = current->real_blocked; + siginitset(¤t->real_blocked, 0); recalc_sigpending(); } } - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock(¤t->sigmask_lock); + spin_unlock_irq(¤t->sig->siglock); if (sig) { ret = sig; @@ -1195,33 +1487,35 @@ sys_kill(int pid, int sig) } /* - * Kill only one task, even if it's a CLONE_THREAD task. + * Send a signal to only one task, even if it's a CLONE_THREAD task. */ asmlinkage long sys_tkill(int pid, int sig) { - struct siginfo info; - int error; - struct task_struct *p; - - /* This is only valid for single tasks */ - if (pid <= 0) - return -EINVAL; - - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_TKILL; - info.si_pid = current->pid; - info.si_uid = current->uid; - - read_lock(&tasklist_lock); - p = find_task_by_pid(pid); - error = -ESRCH; - if (p) { - error = send_sig_info(sig, &info, p); - } - read_unlock(&tasklist_lock); - return error; + struct siginfo info; + int error; + struct task_struct *p; + + /* This is only valid for single tasks */ + if (pid <= 0) + return -EINVAL; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_TKILL; + info.si_pid = current->pid; + info.si_uid = current->uid; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + error = -ESRCH; + if (p) { + spin_lock_irq(&p->sig->siglock); + error = __send_sig_info(sig, &info, p, 0); + spin_unlock_irq(&p->sig->siglock); + } + read_unlock(&tasklist_lock); + return error; } asmlinkage long @@ -1247,13 +1541,12 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) { struct k_sigaction *k; - if (sig < 1 || sig > _NSIG || - (act && (sig == SIGKILL || sig == SIGSTOP))) + if (sig < 1 || sig > _NSIG || (act && sig_kernel_only(sig))) return -EINVAL; k = ¤t->sig->action[sig-1]; - spin_lock(¤t->sig->siglock); + spin_lock_irq(¤t->sig->siglock); if (oact) *oact = *k; @@ -1292,7 +1585,7 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) } } - spin_unlock(¤t->sig->siglock); + spin_unlock_irq(¤t->sig->siglock); return 0; } -- cgit v1.2.3 From 9fdbd959bfec877913d4eab793e813cc7d447b3f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 7 Sep 2002 22:21:48 -0700 Subject: [PATCH] Back out the initial work for atomic copy_*_user() Back out the use of preempt_count to signify atomicity wrt pagefaults. We won't do it that way - in_atomic() works fine. --- include/linux/preempt.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index b4ff1a7c881c..1b227b3c8ccb 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -48,9 +48,6 @@ do { \ preempt_check_resched(); \ } while (0) -#define inc_preempt_count_non_preempt() do { } while (0) -#define dec_preempt_count_non_preempt() do { } while (0) - #else #define preempt_disable() do { } while (0) @@ -58,13 +55,6 @@ do { \ #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) -/* - * Sometimes we want to increment the preempt count, but we know that it's - * already incremented if the kernel is compiled for preemptibility. - */ -#define inc_preempt_count_non_preempt() inc_preempt_count() -#define dec_preempt_count_non_preempt() dec_preempt_count() - #endif #endif /* __LINUX_PREEMPT_H */ -- cgit v1.2.3 From 4b19c9405c4bf7e26cd173bd4cae93b1485bfacb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 7 Sep 2002 22:22:03 -0700 Subject: [PATCH] atomic copy_*_user infrastructure The patch implements the atomic copy_*_user() function. If the kernel takes a pagefault while running copy_*_user() in an atomic region, the copy_*_user() will fail (return a short value). And with this patch, holding an atomic kmap() puts the CPU into an atomic region. - Increment preempt_count() in kmap_atomic() regardless of the setting of CONFIG_PREEMPT. The pagefault handler recognises this as an atomic region and refuses to service the fault. copy_*_user will return a non-zero value. - Attempts to propagate the in_atomic() predicate to all the other highmem-capable architectures' pagefault handlers. But the code is only tested on x86. - Fixed a PPC bug in kunmap_atomic(): it forgot to reenable preemption if HIGHMEM_DEBUG is turned on. - Fixed a sparc bug in kunmap_atomic(): it forgot to reenable preemption all the time, for non-fixmap pages. - Fix an error in - in the CONFIG_HIGHMEM=n case, kunmap_atomic() takes an address, not a page *. --- arch/ppc/mm/fault.c | 2 +- arch/sparc/mm/fault.c | 2 +- include/asm-i386/highmem.h | 6 +++--- include/asm-ppc/hardirq.h | 2 ++ include/asm-ppc/highmem.h | 6 +++++- include/asm-sparc/hardirq.h | 6 ++++++ include/asm-sparc/highmem.h | 6 +++++- include/linux/highmem.h | 4 ++-- 8 files changed, 25 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c index 1d7c7eb5dcf4..65ed61f1344c 100644 --- a/arch/ppc/mm/fault.c +++ b/arch/ppc/mm/fault.c @@ -102,7 +102,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, #endif /* !CONFIG_4xx */ #endif /* CONFIG_XMON || CONFIG_KGDB */ - if (in_interrupt() || mm == NULL) { + if (in_atomic() || mm == NULL) { bad_page_fault(regs, address, SIGSEGV); return; } diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c index 49eccf61be1d..e07d4d1017a9 100644 --- a/arch/sparc/mm/fault.c +++ b/arch/sparc/mm/fault.c @@ -233,7 +233,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; down_read(&mm->mmap_sem); diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h index 1cba7fc45882..0316b53f868f 100644 --- a/include/asm-i386/highmem.h +++ b/include/asm-i386/highmem.h @@ -81,7 +81,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; - preempt_disable(); + inc_preempt_count(); if (page < highmem_start_page) return page_address(page); @@ -104,7 +104,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < FIXADDR_START) { // FIXME - preempt_enable(); + dec_preempt_count(); return; } @@ -119,7 +119,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) __flush_tlb_one(vaddr); #endif - preempt_enable(); + dec_preempt_count(); } #endif /* __KERNEL__ */ diff --git a/include/asm-ppc/hardirq.h b/include/asm-ppc/hardirq.h index d56152a03ccc..547f2491000f 100644 --- a/include/asm-ppc/hardirq.h +++ b/include/asm-ppc/hardirq.h @@ -85,8 +85,10 @@ typedef struct { #define irq_enter() (preempt_count() += HARDIRQ_OFFSET) #if CONFIG_PREEMPT +# define in_atomic() (preempt_count() != kernel_locked()) # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) #else +# define in_atomic() (preempt_count() != 0) # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET #endif #define irq_exit() \ diff --git a/include/asm-ppc/highmem.h b/include/asm-ppc/highmem.h index 5a630083d014..472482ca3f36 100644 --- a/include/asm-ppc/highmem.h +++ b/include/asm-ppc/highmem.h @@ -88,6 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) unsigned int idx; unsigned long vaddr; + inc_preempt_count(); if (page < highmem_start_page) return page_address(page); @@ -109,8 +110,10 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; unsigned int idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < KMAP_FIX_BEGIN) // FIXME + if (vaddr < KMAP_FIX_BEGIN) { // FIXME + dec_preempt_count(); return; + } if (vaddr != KMAP_FIX_BEGIN + idx * PAGE_SIZE) BUG(); @@ -122,6 +125,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) pte_clear(kmap_pte+idx); flush_tlb_page(0, vaddr); #endif + dec_preempt_count(); } #endif /* __KERNEL__ */ diff --git a/include/asm-sparc/hardirq.h b/include/asm-sparc/hardirq.h index a80212dc3a2a..f77ee7e415cf 100644 --- a/include/asm-sparc/hardirq.h +++ b/include/asm-sparc/hardirq.h @@ -113,6 +113,12 @@ do { \ #define irq_exit() br_read_unlock(BR_GLOBALIRQ_LOCK) #endif +#if CONFIG_PREEMPT +# define in_atomic() (preempt_count() != kernel_locked()) +#else +# define in_atomic() (preempt_count() != 0) +#endif + #ifndef CONFIG_SMP #define synchronize_irq() barrier() diff --git a/include/asm-sparc/highmem.h b/include/asm-sparc/highmem.h index bb2fc2331b5b..2ba438ea6111 100644 --- a/include/asm-sparc/highmem.h +++ b/include/asm-sparc/highmem.h @@ -83,6 +83,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) unsigned long idx; unsigned long vaddr; + inc_preempt_count(); if (page < highmem_start_page) return page_address(page); @@ -116,8 +117,10 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long vaddr = (unsigned long) kvaddr; unsigned long idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIX_KMAP_BEGIN) // FIXME + if (vaddr < FIX_KMAP_BEGIN) { // FIXME + dec_preempt_count(); return; + } if (vaddr != FIX_KMAP_BEGIN + idx * PAGE_SIZE) BUG(); @@ -142,6 +145,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) flush_tlb_all(); #endif #endif + dec_preempt_count(); } #endif /* __KERNEL__ */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b389a75be5d7..370177037315 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -24,8 +24,8 @@ static inline void *kmap(struct page *page) { return page_address(page); } #define kunmap(page) do { (void) (page); } while (0) -#define kmap_atomic(page,idx) kmap(page) -#define kunmap_atomic(page,idx) kunmap(page) +#define kmap_atomic(page, idx) page_address(page) +#define kunmap_atomic(addr, idx) do { } while (0) #endif /* CONFIG_HIGHMEM */ -- cgit v1.2.3 From 49ba178cfe9cac97498bf83c25d34317cf207afb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 8 Sep 2002 06:04:56 -0700 Subject: [PATCH] Re: pinpointed: PANIC caused by dequeue_signal() in current Linus This fixes the bootup crash. There were two initialization bugs: - INIT_SIGNAL needs to set shared_pending. - exec() needs to set up newsig properly. the second one caused the crash Anton saw. --- arch/i386/kernel/init_task.c | 2 +- fs/exec.c | 2 ++ include/linux/init_task.h | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index f652f279b787..4eb40a9582c7 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -10,7 +10,7 @@ static struct fs_struct init_fs = INIT_FS; static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); struct mm_struct init_mm = INIT_MM(init_mm); /* diff --git a/fs/exec.c b/fs/exec.c index 9c00b733fcee..9344e1ec3894 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -514,6 +514,8 @@ static inline int make_private_signals(void) spin_lock_init(&newsig->siglock); atomic_set(&newsig->count, 1); memcpy(newsig->action, current->sig->action, sizeof(newsig->action)); + init_sigpending(&newsig->shared_pending); + spin_lock_irq(¤t->sigmask_lock); current->sig = newsig; spin_unlock_irq(¤t->sigmask_lock); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 80a57914bccc..bdf03241a009 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -29,10 +29,11 @@ .mmlist = LIST_HEAD_INIT(name.mmlist), \ } -#define INIT_SIGNALS { \ +#define INIT_SIGNALS(sig) { \ .count = ATOMIC_INIT(1), \ .action = { {{0,}}, }, \ - .siglock = SPIN_LOCK_UNLOCKED \ + .siglock = SPIN_LOCK_UNLOCKED, \ + .shared_pending = { NULL, &sig.shared_pending.head, {{0}}}, \ } /* -- cgit v1.2.3