summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorAndrew Morton <akpm@digeo.com>2003-02-05 16:57:54 -0800
committerLinus Torvalds <torvalds@home.transmeta.com>2003-02-05 16:57:54 -0800
commiteefb08ee7da81e1548ffd5b664682dc5b229ddc2 (patch)
treeef68ebf52e6c836fa679d0f9a49dec6b934d9057 /include/linux
parent6725839b5daa05f79f74d3088d7ff380eb77504e (diff)
[PATCH] Infrastructure for correct hugepage refcounting
We currently have a problem when things like ptrace, futexes and direct-io try to pin user pages. If the user's address is in a huge page we're elevting the refcount of a constituent 4k page, not the head page of the high-order allocation unit. To solve this, a generic way of handling higher-order pages has been implemented: - A higher-order page is called a "compound page". Chose this because "huge page", "large page", "super page", etc all seem to mean different things to different people. - The first (controlling) 4k page of a compound page is referred to as the "head" page. - The remaining pages are tail pages. All pages have PG_compound set. All pages have their lru.next pointing at the head page (even the head page has this). The head page's lru.prev, if non-zero, holds the address of the compound page's put_page() function. The order of the allocation is stored in the first tail page's lru.prev. This is only for debug at present. This usage means that zero-order pages may not be compound. The above relationships are established for _all_ higher-order pages in the page allocator. Which has some cost, but not much - another atomic op during fork(), mainly. This functionality is only enabled if CONFIG_HUGETLB_PAGE, although it could be turned on permanently. There's a little extra cost in get_page/put_page. These changes do not preclude adding compound pages to the LRU in the future - we can add a new page flag to the head page and then move all the additional data to the first tail page's lru.next, lru.prev, list.next, list.prev, index, private, etc.
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/mm.h35
-rw-r--r--include/linux/page-flags.h7
2 files changed, 39 insertions, 3 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d2b99c852301..c68771c27d88 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -208,24 +208,55 @@ struct page {
* Also, many kernel routines increase the page count before a critical
* routine so they can be sure the page doesn't go away from under them.
*/
-#define get_page(p) atomic_inc(&(p)->count)
-#define __put_page(p) atomic_dec(&(p)->count)
#define put_page_testzero(p) \
({ \
BUG_ON(page_count(page) == 0); \
atomic_dec_and_test(&(p)->count); \
})
+
#define page_count(p) atomic_read(&(p)->count)
#define set_page_count(p,v) atomic_set(&(p)->count, v)
+#define __put_page(p) atomic_dec(&(p)->count)
extern void FASTCALL(__page_cache_release(struct page *));
+#ifdef CONFIG_HUGETLB_PAGE
+
+static inline void get_page(struct page *page)
+{
+ if (PageCompound(page))
+ page = (struct page *)page->lru.next;
+ atomic_inc(&page->count);
+}
+
static inline void put_page(struct page *page)
{
+ if (PageCompound(page)) {
+ page = (struct page *)page->lru.next;
+ if (page->lru.prev) { /* destructor? */
+ (*(void (*)(struct page *))page->lru.prev)(page);
+ return;
+ }
+ }
if (!PageReserved(page) && put_page_testzero(page))
__page_cache_release(page);
}
+#else /* CONFIG_HUGETLB_PAGE */
+
+static inline void get_page(struct page *page)
+{
+ atomic_inc(&page->count);
+}
+
+static inline void put_page(struct page *page)
+{
+ if (!PageReserved(page) && put_page_testzero(page))
+ __page_cache_release(page);
+}
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
/*
* Multiple processes may "see" the same page. E.g. for untouched
* mappings of /dev/null, all processes see the same page full of
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0327a8421c9d..5c3bded564d8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -72,7 +72,8 @@
#define PG_direct 16 /* ->pte_chain points directly at pte */
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
-#define PG_reclaim 18 /* To be recalimed asap */
+#define PG_reclaim 18 /* To be reclaimed asap */
+#define PG_compound 19 /* Part of a compound page */
/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -251,6 +252,10 @@ extern void get_full_page_state(struct page_state *ret);
#define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags)
#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
+#define PageCompound(page) test_bit(PG_compound, &(page)->flags)
+#define SetPageCompound(page) set_bit(PG_compound, &(page)->flags)
+#define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags)
+
/*
* The PageSwapCache predicate doesn't use a PG_flag at this time,
* but it may again do so one day.