From 8aa3448cabdfca146aa3fd36e852d0209fb2276a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 22 May 2004 08:10:11 -0700 Subject: [PATCH] rmap 39 add anon_vma rmap From: Hugh Dickins Andrea Arcangeli's anon_vma object-based reverse mapping scheme for anonymous pages. Instead of tracking anonymous pages by pte_chains or by mm, this tracks them by vma. But because vmas are frequently split and merged (particularly by mprotect), a page cannot point directly to its vma(s), but instead to an anon_vma list of those vmas likely to contain the page - a list on which vmas can easily be linked and unlinked as they come and go. The vmas on one list are all related, either by forking or by splitting. This has three particular advantages over anonmm: that it can cope effortlessly with mremap moves; and no longer needs page_table_lock to protect an mm's vma tree, since try_to_unmap finds vmas via page -> anon_vma -> vma instead of using find_vma; and should use less cpu for swapout since it can locate its anonymous vmas more quickly. It does have disadvantages too: a lot more change in mmap.c to deal with anon_vmas, though small straightforward additions now that the vma merging has been refactored there; more lowmem needed for each anon_vma and vma structure; an additional restriction on the merging of vmas (cannot be merged if already assigned different anon_vmas, since then their pages will be pointing to different heads). (There would be no need to enlarge the vma structure if anonymous pages belonged only to anonymous vmas; but private file mappings accumulate anonymous pages by copy-on-write, so need to be listed in both anon_vma and prio_tree at the same time. A different implementation could avoid that by using anon_vmas only for purely anonymous vmas, and use the existing prio_tree to locate cow pages - but that would involve a long search for each single private copy, probably not a good idea.) Where before the vm_pgoff of a purely anonymous (not file-backed) vma was meaningless, now it represents the virtual start address at which that vma is mapped - which the standard file pgoff manipulations treat linearly as vmas are split and merged. But if mremap moves the vma, then it generally carries its original vm_pgoff to the new location, so pages shared with the old location can still be found. Magic. Hugh has massaged it somewhat: building on the earlier rmap patches, this patch is a fifth of the size of Andrea's original anon_vma patch. Please note that this posting will be his first sight of this patch, which he may or may not approve. --- include/linux/mm.h | 20 +++++++++++++++-- include/linux/rmap.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c9b01a220b21..ec7c6767aae7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -15,6 +15,7 @@ #include struct mempolicy; +struct anon_vma; #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -78,6 +79,15 @@ struct vm_area_struct { struct prio_tree_node prio_tree_node; } shared; + /* + * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma + * list, after a COW of one of the file pages. A MAP_SHARED vma + * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack + * or brk vma (with NULL file) can only be in an anon_vma list. + */ + struct list_head anon_vma_node; /* Serialized by anon_vma->lock */ + struct anon_vma *anon_vma; /* Serialized by page_table_lock */ + /* Function pointers to deal with this struct. */ struct vm_operations_struct * vm_ops; @@ -201,7 +211,12 @@ struct page { * if PagePrivate set; used for * swp_entry_t if PageSwapCache */ - struct address_space *mapping; /* The inode (or ...) we belong to. */ + struct address_space *mapping; /* If PG_anon clear, points to + * inode address_space, or NULL. + * If page mapped as anonymous + * memory, PG_anon is set, and + * it points to anon_vma object. + */ pgoff_t index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! @@ -610,7 +625,8 @@ extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, - unsigned long vm_flags, struct file *, pgoff_t, struct mempolicy *); + unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, + struct mempolicy *); extern int split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 620987fa9607..e3148341f476 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -2,18 +2,75 @@ #define _LINUX_RMAP_H /* * Declarations for Reverse Mapping functions in mm/rmap.c - * Its structures are declared within that file. */ #include +#include +#include +#include #define page_map_lock(page) \ bit_spin_lock(PG_maplock, (unsigned long *)&(page)->flags) #define page_map_unlock(page) \ bit_spin_unlock(PG_maplock, (unsigned long *)&(page)->flags) +/* + * The anon_vma heads a list of private "related" vmas, to scan if + * an anonymous page pointing to this anon_vma needs to be unmapped: + * the vmas on the list will be related by forking, or by splitting. + * + * Since vmas come and go as they are split and merged (particularly + * in mprotect), the mapping field of an anonymous page cannot point + * directly to a vma: instead it points to an anon_vma, on whose list + * the related vmas can be easily linked or unlinked. + * + * After unlinking the last vma on the list, we must garbage collect + * the anon_vma object itself: we're guaranteed no page can be + * pointing to this anon_vma once its vma list is empty. + */ +struct anon_vma { + spinlock_t lock; /* Serialize access to vma list */ + struct list_head head; /* List of private "related" vmas */ +}; + #ifdef CONFIG_MMU +extern kmem_cache_t *anon_vma_cachep; + +static inline struct anon_vma *anon_vma_alloc(void) +{ + return kmem_cache_alloc(anon_vma_cachep, SLAB_KERNEL); +} + +static inline void anon_vma_free(struct anon_vma *anon_vma) +{ + kmem_cache_free(anon_vma_cachep, anon_vma); +} + +static inline void anon_vma_lock(struct vm_area_struct *vma) +{ + struct anon_vma *anon_vma = vma->anon_vma; + if (anon_vma) + spin_lock(&anon_vma->lock); +} + +static inline void anon_vma_unlock(struct vm_area_struct *vma) +{ + struct anon_vma *anon_vma = vma->anon_vma; + if (anon_vma) + spin_unlock(&anon_vma->lock); +} + +/* + * anon_vma helper functions. + */ +void anon_vma_init(void); /* create anon_vma_cachep */ +int anon_vma_prepare(struct vm_area_struct *); +void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *); +void anon_vma_unlink(struct vm_area_struct *); +void anon_vma_link(struct vm_area_struct *); +void __anon_vma_link(struct vm_area_struct *); + /* * rmap interfaces called when adding or removing pte of page */ @@ -43,6 +100,10 @@ int try_to_unmap(struct page *); #else /* !CONFIG_MMU */ +#define anon_vma_init() do {} while (0) +#define anon_vma_prepare(vma) (0) +#define anon_vma_link(vma) do {} while (0) + #define page_referenced(page) TestClearPageReferenced(page) #define try_to_unmap(page) SWAP_FAIL -- cgit v1.2.3