diff options
| author | Linus Torvalds <torvalds@athlon.transmeta.com> | 2002-02-04 20:18:55 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@athlon.transmeta.com> | 2002-02-04 20:18:55 -0800 |
| commit | a880f45a48be2956d2c78a839c472287d54435c1 (patch) | |
| tree | bdcd6f6e8352cc495771e61e00cb729cc7215f65 /include/linux | |
| parent | c37fa164f793735b32aa3f53154ff1a7659e6442 (diff) | |
v2.4.9.10 -> v2.4.9.11
- Neil Brown: md cleanups/fixes
- Andrew Morton: console locking merge
- Andrea Arkangeli: major VM merge
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/blkdev.h | 11 | ||||
| -rw-r--r-- | include/linux/cache.h | 9 | ||||
| -rw-r--r-- | include/linux/console.h | 5 | ||||
| -rw-r--r-- | include/linux/ext2_fs_i.h | 1 | ||||
| -rw-r--r-- | include/linux/fs.h | 79 | ||||
| -rw-r--r-- | include/linux/highmem.h | 2 | ||||
| -rw-r--r-- | include/linux/irq.h | 13 | ||||
| -rw-r--r-- | include/linux/kbd_kern.h | 20 | ||||
| -rw-r--r-- | include/linux/kernel.h | 3 | ||||
| -rw-r--r-- | include/linux/list.h | 8 | ||||
| -rw-r--r-- | include/linux/loop.h | 1 | ||||
| -rw-r--r-- | include/linux/lvm.h | 5 | ||||
| -rw-r--r-- | include/linux/mm.h | 81 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 9 | ||||
| -rw-r--r-- | include/linux/pagemap.h | 1 | ||||
| -rw-r--r-- | include/linux/rbtree.h | 133 | ||||
| -rw-r--r-- | include/linux/sched.h | 14 | ||||
| -rw-r--r-- | include/linux/slab.h | 2 | ||||
| -rw-r--r-- | include/linux/swap.h | 157 | ||||
| -rw-r--r-- | include/linux/swapctl.h | 22 | ||||
| -rw-r--r-- | include/linux/timer.h | 1 |
21 files changed, 357 insertions, 220 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86ea92ae5a94..f266229c340c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -203,4 +203,15 @@ static inline int get_hardsect_size(kdev_t dev) #define blk_finished_io(nsects) do { } while (0) #define blk_started_io(nsects) do { } while (0) +static inline int buffered_blk_size(kdev_t dev) +{ + int ret = INT_MAX; + int major = MAJOR(dev); + + if (blk_size[major]) + ret = blk_size[major][MINOR(dev)] + ((BUFFERED_BLOCKSIZE-1) >> BLOCK_SIZE_BITS); + + return ret; +} + #endif diff --git a/include/linux/cache.h b/include/linux/cache.h index 2030eb72bd36..086accecfce4 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -1,6 +1,7 @@ #ifndef __LINUX_CACHE_H #define __LINUX_CACHE_H +#include <linux/config.h> #include <asm/cache.h> #ifndef L1_CACHE_ALIGN @@ -15,6 +16,14 @@ #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif +#ifndef ____cacheline_aligned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_aligned_in_smp ____cacheline_aligned +#else +#define ____cacheline_aligned_in_smp +#endif /* CONFIG_SMP */ +#endif + #ifndef __cacheline_aligned #ifdef MODULE #define __cacheline_aligned ____cacheline_aligned diff --git a/include/linux/console.h b/include/linux/console.h index 288f83b96f99..d2ae967af36c 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -91,8 +91,6 @@ extern struct console_cmdline console_list[MAX_CMDLINECONSOLES]; #define CON_CONSDEV (2) /* Last on the command line */ #define CON_ENABLED (4) -extern spinlock_t console_lock; - struct console { char name[8]; @@ -111,6 +109,9 @@ struct console extern void register_console(struct console *); extern int unregister_console(struct console *); extern struct console *console_drivers; +extern void acquire_console_sem(void); +extern void release_console_sem(void); +extern void console_conditional_schedule(void); /* VESA Blanking Levels */ #define VESA_NO_BLANKING 0 diff --git a/include/linux/ext2_fs_i.h b/include/linux/ext2_fs_i.h index 3f027f9d7ac8..7f02e7537ba3 100644 --- a/include/linux/ext2_fs_i.h +++ b/include/linux/ext2_fs_i.h @@ -34,6 +34,7 @@ struct ext2_inode_info { __u32 i_next_alloc_goal; __u32 i_prealloc_block; __u32 i_prealloc_count; + __u32 i_dir_start_lookup; int i_new_inode:1; /* Is a freshly allocated inode */ }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 549459db59f9..31a2167afac8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -46,6 +46,10 @@ struct poll_table_struct; #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) +/* buffer header fixed size for the blkdev I/O through pagecache */ +#define BUFFERED_BLOCKSIZE_BITS 10 +#define BUFFERED_BLOCKSIZE (1 << BUFFERED_BLOCKSIZE_BITS) + /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { int nr_files; /* read only */ @@ -211,7 +215,8 @@ enum bh_state_bits { BH_Req, /* 0 if the buffer has been invalidated */ BH_Mapped, /* 1 if the buffer has a disk mapping */ BH_New, /* 1 if the buffer is new and not yet written out */ - BH_Protected, /* 1 if the buffer is protected */ + BH_Async, /* 1 if the buffer is under end_buffer_io_async I/O */ + BH_Wait_IO, /* 1 if we should throttle on this buffer */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -271,7 +276,7 @@ void init_buffer(struct buffer_head *, bh_end_io_t *, void *); #define buffer_req(bh) __buffer_state(bh,Req) #define buffer_mapped(bh) __buffer_state(bh,Mapped) #define buffer_new(bh) __buffer_state(bh,New) -#define buffer_protected(bh) __buffer_state(bh,Protected) +#define buffer_async(bh) __buffer_state(bh,Async) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) @@ -366,6 +371,7 @@ struct iattr { */ struct page; struct address_space; +struct kiobuf; struct address_space_operations { int (*writepage)(struct page *); @@ -375,6 +381,8 @@ struct address_space_operations { int (*commit_write)(struct file *, struct page *, unsigned, unsigned); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ int (*bmap)(struct address_space *, long); +#define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */ + int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int); }; struct address_space { @@ -401,9 +409,10 @@ struct char_device { struct block_device { struct list_head bd_hash; atomic_t bd_count; -/* struct address_space bd_data; */ + struct inode * bd_inode; dev_t bd_dev; /* not a kdev_t - it's a search key */ - atomic_t bd_openers; + int bd_openers; + int bd_cache_openers; const struct block_device_operations *bd_op; struct semaphore bd_sem; /* open/close mutex */ }; @@ -414,6 +423,7 @@ struct inode { struct list_head i_dentry; struct list_head i_dirty_buffers; + struct list_head i_dirty_data_buffers; unsigned long i_ino; atomic_t i_count; @@ -438,7 +448,8 @@ struct inode { wait_queue_head_t i_wait; struct file_lock *i_flock; struct address_space *i_mapping; - struct address_space i_data; + struct address_space i_data; + int i_mapping_overload; struct dquot *i_dquot[MAXQUOTAS]; /* These three should probably be a union */ struct pipe_inode_info *i_pipe; @@ -512,6 +523,10 @@ struct file { /* needed for tty driver, and maybe others */ void *private_data; + + /* preallocated helper kiobuf to speedup O_DIRECT */ + struct kiobuf *f_iobuf; + long f_iobuf_lock; }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); @@ -1035,7 +1050,9 @@ extern void bdput(struct block_device *); extern struct char_device *cdget(dev_t); extern void cdput(struct char_device *); extern int blkdev_open(struct inode *, struct file *); +extern int blkdev_close(struct inode *, struct file *); extern struct file_operations def_blk_fops; +extern struct address_space_operations def_blk_aops; extern struct file_operations def_fifo_fops; extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_get(struct block_device *, mode_t, unsigned, int); @@ -1074,8 +1091,7 @@ extern void set_buffer_async_io(struct buffer_head *bh) ; #define BUF_CLEAN 0 #define BUF_LOCKED 1 /* Buffers scheduled for write */ #define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ -#define BUF_PROTECTED 3 /* Ramdisk persistent storage */ -#define NR_LIST 4 +#define NR_LIST 3 static inline void get_bh(struct buffer_head * bh) { @@ -1112,24 +1128,21 @@ static inline void mark_buffer_clean(struct buffer_head * bh) __mark_buffer_clean(bh); } -#define atomic_set_buffer_protected(bh) test_and_set_bit(BH_Protected, &(bh)->b_state) - -static inline void __mark_buffer_protected(struct buffer_head *bh) -{ - refile_buffer(bh); -} - -static inline void mark_buffer_protected(struct buffer_head * bh) -{ - if (!atomic_set_buffer_protected(bh)) - __mark_buffer_protected(bh); -} - +extern void FASTCALL(__mark_dirty(struct buffer_head *bh)); extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh)); extern void FASTCALL(mark_buffer_dirty(struct buffer_head *bh)); +extern void FASTCALL(buffer_insert_inode_data_queue(struct buffer_head *, struct inode *)); #define atomic_set_buffer_dirty(bh) test_and_set_bit(BH_Dirty, &(bh)->b_state) +static inline void mark_buffer_async(struct buffer_head * bh, int on) +{ + if (on) + set_bit(BH_Async, &bh->b_state); + else + clear_bit(BH_Async, &bh->b_state); +} + /* * If an error happens during the make_request, this function * has to be recalled. It marks the buffer as clean and not @@ -1157,20 +1170,29 @@ extern int check_disk_change(kdev_t); extern int invalidate_inodes(struct super_block *); extern int invalidate_device(kdev_t, int); extern void invalidate_inode_pages(struct inode *); +extern void invalidate_inode_pages2(struct address_space *); extern void invalidate_inode_buffers(struct inode *); -#define invalidate_buffers(dev) __invalidate_buffers((dev), 0) -#define destroy_buffers(dev) __invalidate_buffers((dev), 1) -extern void __invalidate_buffers(kdev_t dev, int); +#define invalidate_buffers(dev) __invalidate_buffers((dev), 0, 0) +#define destroy_buffers(dev) __invalidate_buffers((dev), 1, 0) +#define update_buffers(dev) \ +do { \ + __invalidate_buffers((dev), 0, 1); \ + __invalidate_buffers((dev), 0, 2); \ +} while (0) +extern void __invalidate_buffers(kdev_t dev, int, int); extern void sync_inodes(kdev_t); extern void sync_unlocked_inodes(void); extern void write_inode_now(struct inode *, int); +extern int sync_buffers(kdev_t, int); extern void sync_dev(kdev_t); extern int fsync_dev(kdev_t); extern int fsync_super(struct super_block *); extern int fsync_no_super(kdev_t); extern void sync_inodes_sb(struct super_block *); -extern int fsync_inode_buffers(struct inode *); extern int osync_inode_buffers(struct inode *); +extern int osync_inode_data_buffers(struct inode *); +extern int fsync_inode_buffers(struct inode *); +extern int fsync_inode_data_buffers(struct inode *); extern int inode_has_buffers(struct inode *); extern void filemap_fdatasync(struct address_space *); extern void filemap_fdatawait(struct address_space *); @@ -1329,7 +1351,9 @@ extern int brw_page(int, struct page *, kdev_t, int [], int); typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); /* Generic buffer handling for block filesystems.. */ -extern int block_flushpage(struct page *, unsigned long); +extern int discard_bh_page(struct page *, unsigned long, int); +#define block_flushpage(page, offset) discard_bh_page(page, offset, 1) +#define block_invalidate_page(page) discard_bh_page(page, 0, 0) extern int block_symlink(struct inode *, const char *, int); extern int block_write_full_page(struct page*, get_block_t*); extern int block_read_full_page(struct page*, get_block_t*); @@ -1341,6 +1365,8 @@ extern int block_sync_page(struct page *); int generic_block_bmap(struct address_space *, long, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); +extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); +extern void create_empty_buffers(struct page *, kdev_t, unsigned long); extern int waitfor_one_page(struct page*); extern int generic_file_mmap(struct file *, struct vm_area_struct *); @@ -1400,6 +1426,9 @@ extern ssize_t block_write(struct file *, const char *, size_t, loff_t *); extern int file_fsync(struct file *, struct dentry *, int); extern int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx); extern int generic_osync_inode(struct inode *, int); +#define OSYNC_METADATA (1<<0) +#define OSYNC_DATA (1<<1) +#define OSYNC_INODE (1<<2) extern int inode_change_ok(struct inode *, struct iattr *); extern int inode_setattr(struct inode *, struct iattr *); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index abc5e29d95b9..1e2b8b1f4ae6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -11,7 +11,7 @@ extern struct page *highmem_start_page; #include <asm/highmem.h> /* declarations for linux/mm/highmem.c */ -FASTCALL(unsigned int nr_free_highpages(void)); +unsigned int nr_free_highpages(void); extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); diff --git a/include/linux/irq.h b/include/linux/irq.h index 3b6af49b3071..fca74da7d54b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -56,6 +56,19 @@ extern irq_desc_t irq_desc [NR_IRQS]; #include <asm/hw_irq.h> /* the arch dependent stuff */ +/** + * touch_nmi_watchdog - restart NMI watchdog timeout. + * + * If the architecture supports the NMI watchdog, touch_nmi_watchdog() + * may be used to reset the timeout - for code which intentionally + * disables interrupts for a long time. This call is stateless. + */ +#ifdef ARCH_HAS_NMI_WATCHDOG +extern void touch_nmi_watchdog(void); +#else +# define touch_nmi_watchdog() do { } while(0) +#endif + extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); extern int setup_irq(unsigned int , struct irqaction * ); diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index cc442a6b865b..7d0395582eb7 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -69,23 +69,12 @@ extern int kbd_init(void); extern unsigned char getledstate(void); extern void setledstate(struct kbd_struct *kbd, unsigned int led); -extern struct tasklet_struct console_tasklet; - extern int do_poke_blanked_console; extern void (*kbd_ledfunc)(unsigned int led); -static inline void show_console(void) -{ - do_poke_blanked_console = 1; - tasklet_schedule(&console_tasklet); -} - -static inline void set_console(int nr) -{ - want_console = nr; - tasklet_schedule(&console_tasklet); -} +extern void set_console(int nr); +extern void schedule_console_callback(void); static inline void set_leds(void) { @@ -159,12 +148,9 @@ extern unsigned int keymap_count; /* console.c */ -extern task_queue con_task_queue; - static inline void con_schedule_flip(struct tty_struct *t) { - queue_task(&t->flip.tqueue, &con_task_queue); - tasklet_schedule(&console_tasklet); + schedule_task(&t->flip.tqueue); } #endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4d0143310aca..1ee66f6e10c9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -93,6 +93,9 @@ static inline void console_verbose(void) console_loglevel = 15; } +extern void bust_spinlocks(int yes); +extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ + #if DEBUG #define pr_debug(fmt,arg...) \ printk(KERN_DEBUG fmt,##arg) diff --git a/include/linux/list.h b/include/linux/list.h index 48fea84b49ed..4a88363eeca4 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -3,6 +3,8 @@ #if defined(__KERNEL__) || defined(_LVM_H_INCLUDE) +#include <linux/prefetch.h> + /* * Simple doubly linked list implementation. * @@ -90,6 +92,7 @@ static __inline__ void __list_del(struct list_head * prev, static __inline__ void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); + entry->next = entry->prev = 0; } /** @@ -147,8 +150,9 @@ static __inline__ void list_splice(struct list_head *list, struct list_head *hea * @head: the head for your list. */ #define list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - + for (pos = (head)->next, prefetch(pos->next); pos != (head); \ + pos = pos->next, prefetch(pos->next)) + #endif /* __KERNEL__ || _LVM_H_INCLUDE */ #endif diff --git a/include/linux/loop.h b/include/linux/loop.h index 37eefbb13f9b..11622345f2f9 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -77,6 +77,7 @@ static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf, */ #define LO_FLAGS_DO_BMAP 1 #define LO_FLAGS_READ_ONLY 2 +#define LO_FLAGS_BH_REMAP 4 /* * Note that this structure gets the wrong offsets when directly used diff --git a/include/linux/lvm.h b/include/linux/lvm.h index e4f6c1f44a3b..b1bc98c045a9 100644 --- a/include/linux/lvm.h +++ b/include/linux/lvm.h @@ -283,8 +283,9 @@ #define LVM_MAX_STRIPES 128 /* max # of stripes */ #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */ #define LVM_MAX_MIRRORS 2 /* future use */ -#define LVM_MIN_READ_AHEAD 2 /* minimum read ahead sectors */ -#define LVM_MAX_READ_AHEAD 120 /* maximum read ahead sectors */ +#define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */ +#define LVM_DEFAULT_READ_AHEAD 1024 /* default read ahead sectors for 512k scsi segments */ +#define LVM_MAX_READ_AHEAD 10000 /* maximum read ahead sectors */ #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */ #define LVM_PARTITION 0xfe /* LVM partition id */ #define LVM_NEW_PARTITION 0x8e /* new LVM partition id (10/09/1999) */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 949bb22391f7..1efd8c8b1fd2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/mmzone.h> #include <linux/swap.h> +#include <linux/rbtree.h> extern unsigned long max_mapnr; extern unsigned long num_physpages; @@ -18,7 +19,7 @@ extern void * high_memory; extern int page_cluster; /* The inactive_clean lists are per zone. */ extern struct list_head active_list; -extern struct list_head inactive_dirty_list; +extern struct list_head inactive_list; #include <asm/page.h> #include <asm/pgtable.h> @@ -50,10 +51,7 @@ struct vm_area_struct { pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, listed below. */ - /* AVL tree of VM areas per task, sorted by address */ - short vm_avl_height; - struct vm_area_struct * vm_avl_left; - struct vm_area_struct * vm_avl_right; + rb_node_t vm_rb; /* * For areas with an address space and backing store, @@ -156,7 +154,6 @@ typedef struct page { updated asynchronously */ struct list_head lru; /* Pageout list, eg. active_list; protected by pagemap_lru_lock !! */ - unsigned long age; /* Page aging counter. */ wait_queue_head_t wait; /* Page locked? Stand in line... */ struct page **pprev_hash; /* Complement to *next_hash. */ struct buffer_head * buffers; /* Buffer maps us to a disk block. */ @@ -275,16 +272,14 @@ typedef struct page { #define PG_dirty 4 #define PG_decr_after 5 #define PG_active 6 -#define PG_inactive_dirty 7 +#define PG_inactive 7 #define PG_slab 8 #define PG_swap_cache 9 #define PG_skip 10 -#define PG_inactive_clean 11 -#define PG_highmem 12 -#define PG_checked 13 /* kill me in 2.5.<early>. */ - /* bits 21-29 unused */ -#define PG_arch_1 30 -#define PG_reserved 31 +#define PG_highmem 11 +#define PG_checked 12 /* kill me in 2.5.<early>. */ +#define PG_arch_1 13 +#define PG_reserved 14 /* Make it prettier to test the above... */ #define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) @@ -347,14 +342,14 @@ static inline void set_page_dirty(struct page * page) #define PageActive(page) test_bit(PG_active, &(page)->flags) #define SetPageActive(page) set_bit(PG_active, &(page)->flags) #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) +#define TestandSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags) +#define TestandClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags) -#define PageInactiveDirty(page) test_bit(PG_inactive_dirty, &(page)->flags) -#define SetPageInactiveDirty(page) set_bit(PG_inactive_dirty, &(page)->flags) -#define ClearPageInactiveDirty(page) clear_bit(PG_inactive_dirty, &(page)->flags) - -#define PageInactiveClean(page) test_bit(PG_inactive_clean, &(page)->flags) -#define SetPageInactiveClean(page) set_bit(PG_inactive_clean, &(page)->flags) -#define ClearPageInactiveClean(page) clear_bit(PG_inactive_clean, &(page)->flags) +#define PageInactive(page) test_bit(PG_inactive, &(page)->flags) +#define SetPageInactive(page) set_bit(PG_inactive, &(page)->flags) +#define ClearPageInactive(page) clear_bit(PG_inactive, &(page)->flags) +#define TestandSetPageInactive(page) test_and_set_bit(PG_inactive, &(page)->flags) +#define TestandClearPageInactive(page) test_and_clear_bit(PG_inactive, &(page)->flags) #ifdef CONFIG_HIGHMEM #define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) @@ -380,11 +375,11 @@ extern mem_map_t * mem_map; * can allocate highmem pages, the *get*page*() variants return * virtual kernel addresses to the allocated page(s). */ -extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned long order)); -extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned long order, zonelist_t *zonelist)); -extern struct page * alloc_pages_node(int nid, int gfp_mask, unsigned long order); +extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order)); +extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); +extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); -static inline struct page * alloc_pages(int gfp_mask, unsigned long order) +static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) { /* * Gets optimized away by the compiler. @@ -396,8 +391,8 @@ static inline struct page * alloc_pages(int gfp_mask, unsigned long order) #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long order)); -extern unsigned long FASTCALL(get_zeroed_page(int gfp_mask)); +extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order)); +extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask)); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask),0) @@ -413,8 +408,8 @@ extern unsigned long FASTCALL(get_zeroed_page(int gfp_mask)); /* * There is only one 'core' page-freeing function. */ -extern void FASTCALL(__free_pages(struct page *page, unsigned long order)); -extern void FASTCALL(free_pages(unsigned long addr, unsigned long order)); +extern void FASTCALL(__free_pages(struct page *page, unsigned int order)); +extern void FASTCALL(free_pages(unsigned long addr, unsigned int order)); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr),0) @@ -451,7 +446,7 @@ extern int ptrace_attach(struct task_struct *tsk); */ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { - if (!pgd_present(*pgd)) + if (pgd_none(*pgd)) return __pmd_alloc(mm, pgd, address); return pmd_offset(pgd, address); } @@ -468,6 +463,11 @@ extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void swapin_readahead(swp_entry_t); +static inline int is_page_cache_freeable(struct page * page) +{ + return page_count(page) - !!page->buffers == 1; +} + /* * Work out if there are any other processes sharing this * swap cache page. Never mind the buffers. @@ -490,7 +490,7 @@ extern void lock_vma_mappings(struct vm_area_struct *); extern void unlock_vma_mappings(struct vm_area_struct *); extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void build_mmap_avl(struct mm_struct *); +extern void build_mmap_rb(struct mm_struct *); extern void exit_mmap(struct mm_struct *); extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); @@ -516,6 +516,22 @@ extern int do_munmap(struct mm_struct *, unsigned long, size_t); extern unsigned long do_brk(unsigned long, unsigned long); +static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev) +{ + prev->vm_next = vma->vm_next; + rb_erase(&vma->vm_rb, &mm->mm_rb); + if (mm->mmap_cache == vma) + mm->mmap_cache = prev; +} + +static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags) +{ + if (!vma->vm_file && vma->vm_flags == vm_flags) + return 1; + else + return 0; +} + struct zone_t; /* filemap.c */ extern void remove_inode_page(struct page *); @@ -562,6 +578,11 @@ static inline int expand_stack(struct vm_area_struct * vma, unsigned long addres { unsigned long grow; + /* + * vma->vm_start/vm_end cannot change under us because the caller is required + * to hold the mmap_sem in write mode. We need to get the spinlock only + * before relocating the vma range ourself. + */ address &= PAGE_MASK; grow = (vma->vm_start - address) >> PAGE_SHIFT; if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ab122514d508..ea14bd835c68 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -39,14 +39,12 @@ typedef struct zone_struct { */ spinlock_t lock; unsigned long free_pages; - unsigned long inactive_clean_pages; - unsigned long inactive_dirty_pages; unsigned long pages_min, pages_low, pages_high; + int need_balance; /* * free areas of different sizes */ - struct list_head inactive_clean_list; free_area_t free_area[MAX_ORDER]; /* @@ -101,6 +99,7 @@ struct bootmem_data; typedef struct pglist_data { zone_t node_zones[MAX_NR_ZONES]; zonelist_t node_zonelists[GFP_ZONEMASK+1]; + int nr_zones; struct page *node_mem_map; unsigned long *valid_addr_bitmap; struct bootmem_data *bdata; @@ -114,8 +113,8 @@ typedef struct pglist_data { extern int numnodes; extern pg_data_t *pgdat_list; -#define memclass(pgzone, tzone) (((pgzone)->zone_pgdat == (tzone)->zone_pgdat) \ - && ((pgzone) <= (tzone))) +#define memclass(pgzone, classzone) (((pgzone)->zone_pgdat == (classzone)->zone_pgdat) \ + && ((pgzone) <= (classzone))) /* * The following two are not meant for general usage. They are here as diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index fa422a86f02a..88366342a2c9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -29,7 +29,6 @@ #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) #define page_cache_get(x) get_page(x) -#define page_cache_free(x) __free_page(x) #define page_cache_release(x) __free_page(x) static inline struct page *page_cache_alloc(struct address_space *x) diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h new file mode 100644 index 000000000000..96f20e145be5 --- /dev/null +++ b/include/linux/rbtree.h @@ -0,0 +1,133 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + Some example of insert and search follows here. The search is a plain + normal search over an ordered tree. The insert instead must be implemented + int two steps: as first thing the code must insert the element in + order as a red leaf in the tree, then the support library function + rb_insert_color() must be called. Such function will do the + not trivial work to rebalance the rbtree if necessary. + +----------------------------------------------------------------------- +static inline struct page * rb_search_page_cache(struct inode * inode, + unsigned long offset) +{ + rb_node_t * n = inode->i_rb_page_cache.rb_node; + struct page * page; + + while (n) + { + page = rb_entry(n, struct page, rb_page_cache); + + if (offset < page->offset) + n = n->rb_left; + else if (offset > page->offset) + n = n->rb_right; + else + return page; + } + return NULL; +} + +static inline struct page * __rb_insert_page_cache(struct inode * inode, + unsigned long offset, + rb_node_t * node) +{ + rb_node_t ** p = &inode->i_rb_page_cache.rb_node; + rb_node_t * parent = NULL; + struct page * page; + + while (*p) + { + parent = *p; + page = rb_entry(parent, struct page, rb_page_cache); + + if (offset < page->offset) + p = &(*p)->rb_left; + else if (offset > page->offset) + p = &(*p)->rb_right; + else + return page; + } + + rb_link_node(node, parent, p); + + return NULL; +} + +static inline struct page * rb_insert_page_cache(struct inode * inode, + unsigned long offset, + rb_node_t * node) +{ + struct page * ret; + if ((ret = __rb_insert_page_cache(inode, offset, node))) + goto out; + rb_insert_color(node, &inode->i_rb_page_cache); + out: + return ret; +} +----------------------------------------------------------------------- +*/ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +#include <linux/kernel.h> +#include <linux/stddef.h> + +typedef struct rb_node_s +{ + struct rb_node_s * rb_parent; + int rb_color; +#define RB_RED 0 +#define RB_BLACK 1 + struct rb_node_s * rb_right; + struct rb_node_s * rb_left; +} +rb_node_t; + +typedef struct rb_root_s +{ + struct rb_node_s * rb_node; +} +rb_root_t; + +#define RB_ROOT (rb_root_t) { NULL, } +#define rb_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +extern void rb_insert_color(rb_node_t *, rb_root_t *); +extern void rb_erase(rb_node_t *, rb_root_t *); + +static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) +{ + node->rb_parent = parent; + node->rb_color = RB_RED; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#endif /* _LINUX_RBTREE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 594acbc48a34..b964abb8c540 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -12,6 +12,7 @@ extern unsigned long event; #include <linux/types.h> #include <linux/times.h> #include <linux/timex.h> +#include <linux/rbtree.h> #include <asm/system.h> #include <asm/semaphore.h> @@ -200,12 +201,9 @@ struct files_struct { /* Maximum number of active map areas.. This is a random (large) number */ #define MAX_MAP_COUNT (65536) -/* Number of map areas at which the AVL tree is activated. This is arbitrary. */ -#define AVL_MIN_MAP_COUNT 32 - struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ - struct vm_area_struct * mmap_avl; /* tree of VMAs */ + rb_root_t mm_rb; struct vm_area_struct * mmap_cache; /* last find_vma result */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ @@ -237,13 +235,10 @@ extern int mmlist_nr; #define INIT_MM(name) \ { \ - mmap: &init_mmap, \ - mmap_avl: NULL, \ - mmap_cache: NULL, \ + mm_rb: RB_ROOT, \ pgd: swapper_pg_dir, \ mm_users: ATOMIC_INIT(2), \ mm_count: ATOMIC_INIT(1), \ - map_count: 1, \ mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \ page_table_lock: SPIN_LOCK_UNLOCKED, \ mmlist: LIST_HEAD_INIT(name.mmlist), \ @@ -320,6 +315,8 @@ struct task_struct { struct task_struct *next_task, *prev_task; struct mm_struct *active_mm; + struct list_head local_pages; + unsigned int allocation_order, nr_local_pages; /* task state */ struct linux_binfmt *binfmt; @@ -416,6 +413,7 @@ struct task_struct { #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_FREE_PAGES 0x00002000 /* per process page freeing */ #define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ diff --git a/include/linux/slab.h b/include/linux/slab.h index d5ec05fef003..efa8638d612b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -60,7 +60,7 @@ extern void kmem_cache_free(kmem_cache_t *, void *); extern void *kmalloc(size_t, int); extern void kfree(const void *); -extern void kmem_cache_reap(int); +extern int FASTCALL(kmem_cache_reap(int)); extern int slabinfo_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data); extern int slabinfo_write_proc(struct file *file, const char *buffer, diff --git a/include/linux/swap.h b/include/linux/swap.h index d10b6277b2c3..a6d28bee7d71 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -8,7 +8,7 @@ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 -#define MAX_SWAPFILES 8 +#define MAX_SWAPFILES 32 /* * Magic header for a swap area. The first part of the union is @@ -80,10 +80,9 @@ struct swap_info_struct { extern int nr_swap_pages; extern unsigned int nr_free_pages(void); -extern unsigned int nr_inactive_clean_pages(void); extern unsigned int nr_free_buffer_pages(void); extern int nr_active_pages; -extern int nr_inactive_dirty_pages; +extern int nr_inactive_pages; extern atomic_t nr_async_pages; extern struct address_space swapper_space; extern atomic_t page_cache_size; @@ -99,26 +98,20 @@ struct sysinfo; struct zone_t; /* linux/mm/swap.c */ -extern int memory_pressure; -extern void deactivate_page(struct page *); -extern void deactivate_page_nolock(struct page *); -extern void activate_page(struct page *); -extern void activate_page_nolock(struct page *); -extern void lru_cache_add(struct page *); -extern void __lru_cache_del(struct page *); -extern void lru_cache_del(struct page *); -extern void recalculate_vm_stats(void); +extern void FASTCALL(lru_cache_add(struct page *)); +extern void FASTCALL(__lru_cache_del(struct page *)); +extern void FASTCALL(lru_cache_del(struct page *)); + +extern void FASTCALL(deactivate_page(struct page *)); +extern void FASTCALL(deactivate_page_nolock(struct page *)); +extern void FASTCALL(activate_page(struct page *)); +extern void FASTCALL(activate_page_nolock(struct page *)); + extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern struct page * reclaim_page(zone_t *); extern wait_queue_head_t kswapd_wait; -extern wait_queue_head_t kreclaimd_wait; -extern int page_launder(int, int); -extern int free_shortage(void); -extern int inactive_shortage(void); -extern void wakeup_kswapd(void); -extern int try_to_free_pages(unsigned int gfp_mask); +extern int FASTCALL(try_to_free_pages(zone_t *, unsigned int, unsigned int)); /* linux/mm/page_io.c */ extern void rw_swap_page(int, struct page *); @@ -134,7 +127,6 @@ extern struct page * lookup_swap_cache(swp_entry_t); extern struct page * read_swap_cache_async(swp_entry_t); /* linux/mm/oom_kill.c */ -extern int out_of_memory(void); extern void oom_kill(void); /* @@ -146,7 +138,6 @@ extern void delete_from_swap_cache_nolock(struct page *page); extern void free_page_and_swap_cache(struct page *page); /* linux/mm/swapfile.c */ -extern int vm_swap_full(void); extern unsigned int nr_swapfiles; extern struct swap_info_struct swap_info[]; extern int is_swap_partition(kdev_t); @@ -179,90 +170,51 @@ extern unsigned long swap_cache_find_success; extern spinlock_t pagemap_lru_lock; -extern void FASTCALL(mark_page_accessed(struct page *)); - -/* - * Page aging defines. - * Since we do exponential decay of the page age, we - * can chose a fairly large maximum. - */ -#define PAGE_AGE_START 2 -#define PAGE_AGE_ADV 3 -#define PAGE_AGE_MAX 64 - /* * List add/del helper macros. These must be called * with the pagemap_lru_lock held! */ -#define DEBUG_ADD_PAGE \ - if (PageActive(page) || PageInactiveDirty(page) || \ - PageInactiveClean(page)) BUG(); - -#define ZERO_PAGE_BUG \ - if (page_count(page) == 0) BUG(); - -#define add_page_to_active_list(page) { \ - DEBUG_ADD_PAGE \ - ZERO_PAGE_BUG \ - page->age = 0; \ - ClearPageReferenced(page); \ - SetPageActive(page); \ - list_add(&(page)->lru, &active_list); \ - nr_active_pages++; \ -} - -#define add_page_to_inactive_dirty_list(page) { \ - DEBUG_ADD_PAGE \ - ZERO_PAGE_BUG \ - SetPageInactiveDirty(page); \ - list_add(&(page)->lru, &inactive_dirty_list); \ - nr_inactive_dirty_pages++; \ - page->zone->inactive_dirty_pages++; \ -} - -#define add_page_to_inactive_clean_list(page) { \ - DEBUG_ADD_PAGE \ - ZERO_PAGE_BUG \ - SetPageInactiveClean(page); \ - list_add(&(page)->lru, &page->zone->inactive_clean_list); \ - page->zone->inactive_clean_pages++; \ -} - -#define del_page_from_active_list(page) { \ - list_del(&(page)->lru); \ - ClearPageActive(page); \ - nr_active_pages--; \ - DEBUG_ADD_PAGE \ - ZERO_PAGE_BUG \ -} - -#define del_page_from_inactive_dirty_list(page) { \ - list_del(&(page)->lru); \ - ClearPageInactiveDirty(page); \ - nr_inactive_dirty_pages--; \ - page->zone->inactive_dirty_pages--; \ - DEBUG_ADD_PAGE \ - ZERO_PAGE_BUG \ -} - -#define del_page_from_inactive_clean_list(page) { \ - list_del(&(page)->lru); \ - ClearPageInactiveClean(page); \ - page->zone->inactive_clean_pages--; \ - DEBUG_ADD_PAGE \ - ZERO_PAGE_BUG \ -} - -/* - * In mm/swap.c::recalculate_vm_stats(), we substract - * inactive_target from memory_pressure every second. - * This means that memory_pressure is smoothed over - * 64 (1 << INACTIVE_SHIFT) seconds. - */ -#define INACTIVE_SHIFT 6 -#define inactive_target min_t(unsigned long, \ - (memory_pressure >> INACTIVE_SHIFT), \ - (num_physpages / 4)) +#define DEBUG_LRU_PAGE(page) \ +do { \ + if (PageActive(page)) \ + BUG(); \ + if (PageInactive(page)) \ + BUG(); \ + if (page_count(page) == 0) \ + BUG(); \ +} while (0) + +#define add_page_to_active_list(page) \ +do { \ + DEBUG_LRU_PAGE(page); \ + SetPageActive(page); \ + list_add(&(page)->lru, &active_list); \ + nr_active_pages++; \ +} while (0) + +#define add_page_to_inactive_list(page) \ +do { \ + DEBUG_LRU_PAGE(page); \ + SetPageInactive(page); \ + list_add(&(page)->lru, &inactive_list); \ + nr_inactive_pages++; \ +} while (0) + +#define del_page_from_active_list(page) \ +do { \ + list_del(&(page)->lru); \ + ClearPageActive(page); \ + nr_active_pages--; \ + DEBUG_LRU_PAGE(page); \ +} while (0) + +#define del_page_from_inactive_list(page) \ +do { \ + list_del(&(page)->lru); \ + ClearPageInactive(page); \ + nr_inactive_pages--; \ + DEBUG_LRU_PAGE(page); \ +} while (0) /* * Ugly ugly ugly HACK to make sure the inactive lists @@ -278,9 +230,6 @@ extern void FASTCALL(mark_page_accessed(struct page *)); #include <linux/major.h> #endif -#define page_ramdisk(page) \ - (page->buffers && (MAJOR(page->buffers->b_dev) == RAMDISK_MAJOR)) - extern spinlock_t swaplock; #define swap_list_lock() spin_lock(&swaplock) diff --git a/include/linux/swapctl.h b/include/linux/swapctl.h index f9f2d2acbf91..de22bff4b788 100644 --- a/include/linux/swapctl.h +++ b/include/linux/swapctl.h @@ -1,28 +1,6 @@ #ifndef _LINUX_SWAPCTL_H #define _LINUX_SWAPCTL_H -#include <asm/page.h> -#include <linux/fs.h> - -typedef struct buffer_mem_v1 -{ - unsigned int min_percent; - unsigned int borrow_percent; - unsigned int max_percent; -} buffer_mem_v1; -typedef buffer_mem_v1 buffer_mem_t; -extern buffer_mem_t buffer_mem; -extern buffer_mem_t page_cache; - -typedef struct freepages_v1 -{ - unsigned int min; - unsigned int low; - unsigned int high; -} freepages_v1; -typedef freepages_v1 freepages_t; -extern freepages_t freepages; - typedef struct pager_daemon_v1 { unsigned int tries_base; diff --git a/include/linux/timer.h b/include/linux/timer.h index e3249bf933a0..803d268f12e3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -19,6 +19,7 @@ struct timer_list { unsigned long data; void (*function)(unsigned long); }; +typedef struct timer_list timer_t; extern void add_timer(struct timer_list * timer); extern int del_timer(struct timer_list * timer); |
