summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@nuts.ninka.net>2002-09-16 07:18:35 -0700
committerDavid S. Miller <davem@nuts.ninka.net>2002-09-16 07:18:35 -0700
commita0c2011a0a823f300e7d1e12dfd408ff5b96b329 (patch)
treed2f6f202899ff807dfa58f47adcedb7fee4de037
parent4ec5e293040467ce1e9a4cd3666eedb4dd7d8584 (diff)
[SPARC]: First cut of hugetlb on sparc64, 4MB currently.
-rw-r--r--arch/sparc64/Config.help8
-rw-r--r--arch/sparc64/config.in2
-rw-r--r--arch/sparc64/kernel/sys_sparc.c97
-rw-r--r--arch/sparc64/kernel/systbls.S4
-rw-r--r--arch/sparc64/mm/Makefile2
-rw-r--r--arch/sparc64/mm/hugetlbpage.c621
-rw-r--r--arch/sparc64/mm/init.c33
-rw-r--r--include/asm-sparc/unistd.h4
-rw-r--r--include/asm-sparc64/page.h10
-rw-r--r--include/asm-sparc64/unistd.h4
10 files changed, 779 insertions, 6 deletions
diff --git a/arch/sparc64/Config.help b/arch/sparc64/Config.help
index 3d994d76fd7a..5d16e501547c 100644
--- a/arch/sparc64/Config.help
+++ b/arch/sparc64/Config.help
@@ -25,6 +25,14 @@ CONFIG_SMP
If you don't know what to do here, say N.
+CONFIG_HUGETLB_PAGE
+ This enables support for huge pages. User space applications
+ can make use of this support with the sys_alloc_hugepages and
+ sys_free_hugepages system calls. If your applications are
+ huge page aware, then say Y here.
+
+ Otherwise, say N.
+
CONFIG_PREEMPT
This option reduces the latency of the kernel when reacting to
real-time or interactive events by allowing a low priority process to
diff --git a/arch/sparc64/config.in b/arch/sparc64/config.in
index bdf5ee97c0bf..1ec0d525839d 100644
--- a/arch/sparc64/config.in
+++ b/arch/sparc64/config.in
@@ -15,6 +15,8 @@ define_bool CONFIG_VT y
define_bool CONFIG_VT_CONSOLE y
define_bool CONFIG_HW_CONSOLE y
+bool 'SPARC64 Huge TLB Page Support' CONFIG_HUGETLB_PAGE
+
bool 'Symmetric multi-processing support' CONFIG_SMP
bool 'Preemptible Kernel' CONFIG_PREEMPT
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index 4ce757bf8b8d..d459c927b660 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -681,3 +681,100 @@ sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1, unsigned long ar
};
return err;
}
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HPAGE_ALIGN(x) (((unsigned long)x + (HPAGE_SIZE -1)) & HPAGE_MASK)
+extern long sys_munmap(unsigned long, size_t);
+
+/* get_addr function gets the currently unused virtual range in
+ * the current process's address space. It returns the LARGE_PAGE_SIZE
+ * aligned address (in cases of success). Other kernel generic
+ * routines only could gurantee that allocated address is PAGE_SIZE aligned.
+ */
+static long get_addr(unsigned long addr, unsigned long len)
+{
+ struct vm_area_struct *vma;
+ if (addr) {
+ addr = HPAGE_ALIGN(addr);
+ vma = find_vma(current->mm, addr);
+ if (((TASK_SIZE - len) >= addr) &&
+ (!vma || addr + len <= vma->vm_start))
+ goto found_addr;
+ }
+ addr = HPAGE_ALIGN(TASK_UNMAPPED_BASE);
+ for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
+ if (TASK_SIZE - len < addr)
+ return -ENOMEM;
+ if (!vma || ((addr + len) < vma->vm_start))
+ goto found_addr;
+ addr = vma->vm_end;
+ }
+found_addr:
+ addr = HPAGE_ALIGN(addr);
+ return addr;
+}
+
+extern int alloc_hugetlb_pages(int, unsigned long, unsigned long, int, int);
+
+asmlinkage long
+sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long raddr;
+ int retval;
+
+ if (key < 0)
+ return -EINVAL;
+ if (len & (HPAGE_SIZE - 1))
+ return -EINVAL;
+ down_write(&mm->mmap_sem);
+ raddr = get_addr(addr, len);
+ retval = 0;
+ if (raddr == -ENOMEM) {
+ retval = -ENOMEM;
+ goto raddr_out;
+ }
+ retval = alloc_hugetlb_pages(key, raddr, len, prot, flag);
+
+raddr_out:
+ up_write(&mm->mmap_sem);
+ if (retval < 0)
+ return (long) retval;
+
+ return raddr;
+}
+
+extern int free_hugepages(struct vm_area_struct *);
+
+asmlinkage int
+sys_free_hugepages(unsigned long addr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int retval;
+
+ vma = find_vma(current->mm, addr);
+ if ((!vma) || (!is_vm_hugetlb_page(vma)) || (vma->vm_start!=addr))
+ return -EINVAL;
+ down_write(&mm->mmap_sem);
+ spin_lock(&mm->page_table_lock);
+ retval = free_hugepages(vma);
+ spin_unlock(&mm->page_table_lock);
+ up_write(&mm->mmap_sem);
+ return retval;
+}
+
+#else
+
+asmlinkage long
+sys_alloc_hugepages(int key, unsigned long addr, size_t len, int prot, int flag)
+{
+ return -ENOSYS;
+}
+asmlinkage int
+sys_free_hugepages(unsigned long addr)
+{
+ return -ENOSYS;
+}
+
+#endif
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index 0aaced635a7d..2b75166d2db5 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -65,8 +65,8 @@ sys_call_table32:
.word sys32_ipc, sys32_sigreturn, sys_clone, sys_nis_syscall, sys32_adjtimex
/*220*/ .word sys32_sigprocmask, sys32_create_module, sys32_delete_module, sys32_get_kernel_syms, sys_getpgid
.word sys32_bdflush, sys32_sysfs, sys_nis_syscall, sys32_setfsuid16, sys32_setfsgid16
-/*230*/ .word sys32_select, sys_time, sys_nis_syscall, sys_stime, sys_nis_syscall
- .word sys_nis_syscall, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
+/*230*/ .word sys32_select, sys_time, sys_nis_syscall, sys_stime, sys_alloc_hugepages
+ .word sys_free_hugepages, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
/*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
.word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys32_sched_rr_get_interval, sys32_nanosleep
/*250*/ .word sys32_mremap, sys32_sysctl, sys_getsid, sys_fdatasync, sys32_nfsservctl
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index c56fef2d13d9..147b1b5394c8 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -7,4 +7,6 @@ EXTRA_AFLAGS := -ansi
O_TARGET := mm.o
obj-y := ultra.o fault.o init.o generic.o extable.o modutil.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+
include $(TOPDIR)/Rules.make
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
new file mode 100644
index 000000000000..179be0e7ab15
--- /dev/null
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -0,0 +1,621 @@
+/*
+ * SPARC64 Huge TLB page support.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+static struct vm_operations_struct hugetlb_vm_ops;
+struct list_head htlbpage_freelist;
+spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
+extern long htlbpagemem;
+
+static void zap_hugetlb_resources(struct vm_area_struct *);
+
+#define MAX_ID 32
+struct htlbpagekey {
+ struct inode *in;
+ int key;
+} htlbpagek[MAX_ID];
+
+static struct inode *find_key_inode(int key)
+{
+ int i;
+
+ for (i = 0; i < MAX_ID; i++) {
+ if (htlbpagek[i].key == key)
+ return htlbpagek[i].in;
+ }
+ return NULL;
+}
+
+static struct page *alloc_hugetlb_page(void)
+{
+ struct list_head *curr, *head;
+ struct page *page;
+
+ spin_lock(&htlbpage_lock);
+
+ head = &htlbpage_freelist;
+ curr = head->next;
+
+ if (curr == head) {
+ spin_unlock(&htlbpage_lock);
+ return NULL;
+ }
+ page = list_entry(curr, struct page, list);
+ list_del(curr);
+ htlbpagemem--;
+
+ spin_unlock(&htlbpage_lock);
+
+ set_page_count(page, 1);
+ memset(page_address(page), 0, HPAGE_SIZE);
+
+ return page;
+}
+
+static void free_hugetlb_page(struct page *page)
+{
+ spin_lock(&htlbpage_lock);
+ if ((page->mapping != NULL) && (page_count(page) == 2)) {
+ struct inode *inode = page->mapping->host;
+ int i;
+
+ ClearPageDirty(page);
+ remove_from_page_cache(page);
+ set_page_count(page, 1);
+ if ((inode->i_size -= HPAGE_SIZE) == 0) {
+ for (i = 0; i < MAX_ID; i++) {
+ if (htlbpagek[i].key == inode->i_ino) {
+ htlbpagek[i].key = 0;
+ htlbpagek[i].in = NULL;
+ break;
+ }
+ }
+ kfree(inode);
+ }
+ }
+ if (put_page_testzero(page)) {
+ list_add(&page->list, &htlbpage_freelist);
+ htlbpagemem++;
+ }
+ spin_unlock(&htlbpage_lock);
+}
+
+static pte_t *huge_pte_alloc_map(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd) {
+ pmd = pmd_alloc(mm, pgd, addr);
+ if (pmd)
+ pte = pte_alloc_map(mm, pmd, addr);
+ }
+ return pte;
+}
+
+static pte_t *huge_pte_offset_map(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd) {
+ pmd = pmd_offset(pgd, addr);
+ if (pmd)
+ pte = pte_offset_map(pmd, addr);
+ }
+ return pte;
+}
+
+static pte_t *huge_pte_offset_map_nested(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd) {
+ pmd = pmd_offset(pgd, addr);
+ if (pmd)
+ pte = pte_offset_map_nested(pmd, addr);
+ }
+ return pte;
+}
+
+#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZ4MB; } while (0)
+
+static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, pte_t * page_table, int write_access)
+{
+ pte_t entry;
+ unsigned long i;
+
+ mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+
+ for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+ if (write_access)
+ entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
+ vma->vm_page_prot)));
+ else
+ entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
+ entry = pte_mkyoung(entry);
+ mk_pte_huge(entry);
+ pte_val(entry) += (i << PAGE_SHIFT);
+ set_pte(page_table, entry);
+ page_table++;
+ }
+}
+
+static int anon_get_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ int write_access, pte_t * page_table)
+{
+ struct page *page;
+
+ page = alloc_hugetlb_page();
+ if (page == NULL)
+ return -1;
+ set_huge_pte(mm, vma, page, page_table, write_access);
+ return 1;
+}
+
+static int
+make_hugetlb_pages_present(unsigned long addr, unsigned long end, int flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ pte_t *pte;
+ int write;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto out_error1;
+
+ write = (vma->vm_flags & VM_WRITE) != 0;
+ if ((vma->vm_end - vma->vm_start) & (HPAGE_SIZE - 1))
+ goto out_error1;
+
+ spin_lock(&mm->page_table_lock);
+ do {
+ int err;
+
+ pte = huge_pte_alloc_map(mm, addr);
+ err = (!pte ||
+ !pte_none(*pte) ||
+ (anon_get_hugetlb_page(mm, vma,
+ write ? VM_WRITE : VM_READ,
+ pte) == -1));
+ if (pte)
+ pte_unmap(pte);
+ if (err)
+ goto out_error;
+
+ addr += HPAGE_SIZE;
+ } while (addr < end);
+ spin_unlock(&mm->page_table_lock);
+
+ vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
+ if (flags & MAP_PRIVATE)
+ vma->vm_flags |= VM_DONTCOPY;
+ vma->vm_ops = &hugetlb_vm_ops;
+ return 0;
+
+out_error:
+ if (addr > vma->vm_start) {
+ vma->vm_end = addr;
+ flush_cache_range(vma, vma->vm_start, vma->vm_end);
+ zap_hugetlb_resources(vma);
+ flush_tlb_range(vma, vma->vm_start, vma->vm_end);
+ vma->vm_end = end;
+ }
+ spin_unlock(&mm->page_table_lock);
+ out_error1:
+ return -1;
+}
+
+int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+ struct vm_area_struct *vma)
+{
+ pte_t *src_pte, *dst_pte, entry;
+ struct page *ptepage;
+ unsigned long addr = vma->vm_start;
+ unsigned long end = vma->vm_end;
+
+ while (addr < end) {
+ unsigned long i;
+
+ dst_pte = huge_pte_alloc_map(dst, addr);
+ if (!dst_pte)
+ goto nomem;
+
+ src_pte = huge_pte_offset_map_nested(src, addr);
+ entry = *src_pte;
+ pte_unmap_nested(src_pte);
+
+ ptepage = pte_page(entry);
+ get_page(ptepage);
+ for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+ set_pte(dst_pte, entry);
+ pte_val(entry) += PAGE_SIZE;
+ dst_pte++;
+ }
+ pte_unmap(dst_pte - (1 << HUGETLB_PAGE_ORDER));
+
+ dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ addr += HPAGE_SIZE;
+ }
+ return 0;
+
+nomem:
+ return -ENOMEM;
+}
+
+int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page **pages, struct vm_area_struct **vmas,
+ unsigned long *st, int *length, int i)
+{
+ pte_t *ptep, pte;
+ unsigned long start = *st;
+ unsigned long pstart;
+ int len = *length;
+ struct page *page;
+
+ do {
+ pstart = start;
+ ptep = huge_pte_offset_map(mm, start);
+ pte = *ptep;
+
+back1:
+ page = pte_page(pte);
+ if (pages) {
+ page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
+ pages[i] = page;
+ }
+ if (vmas)
+ vmas[i] = vma;
+ i++;
+ len--;
+ start += PAGE_SIZE;
+ if (((start & HPAGE_MASK) == pstart) && len &&
+ (start < vma->vm_end))
+ goto back1;
+
+ pte_unmap(ptep);
+ } while (len && start < vma->vm_end);
+
+ *length = len;
+ *st = start;
+ return i;
+}
+
+static void zap_hugetlb_resources(struct vm_area_struct *mpnt)
+{
+ struct mm_struct *mm = mpnt->vm_mm;
+ unsigned long len, addr, end;
+ struct page *page;
+ pte_t *ptep;
+
+ addr = mpnt->vm_start;
+ end = mpnt->vm_end;
+ len = end - addr;
+ do {
+ unsigned long i;
+
+ ptep = huge_pte_offset_map(mm, addr);
+ page = pte_page(*ptep);
+ for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+ pte_clear(ptep);
+ ptep++;
+ }
+ pte_unmap(ptep - (1 << HUGETLB_PAGE_ORDER));
+ free_hugetlb_page(page);
+ addr += HPAGE_SIZE;
+ } while (addr < end);
+ mm->rss -= (len >> PAGE_SHIFT);
+ mpnt->vm_ops = NULL;
+}
+
+static void unlink_vma(struct vm_area_struct *mpnt)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ vma = mm->mmap;
+ if (vma == mpnt) {
+ mm->mmap = vma->vm_next;
+ } else {
+ while (vma->vm_next != mpnt) {
+ vma = vma->vm_next;
+ }
+ vma->vm_next = mpnt->vm_next;
+ }
+ rb_erase(&mpnt->vm_rb, &mm->mm_rb);
+ mm->mmap_cache = NULL;
+ mm->map_count--;
+}
+
+int free_hugepages(struct vm_area_struct *mpnt)
+{
+ unlink_vma(mpnt);
+
+ flush_cache_range(mpnt, mpnt->vm_start, mpnt->vm_end);
+ zap_hugetlb_resources(mpnt);
+ flush_tlb_range(mpnt, mpnt->vm_start, mpnt->vm_end);
+
+ kmem_cache_free(vm_area_cachep, mpnt);
+ return 1;
+}
+
+static struct inode *set_new_inode(unsigned long len, int prot, int flag, int key)
+{
+ struct inode *inode;
+ int i;
+
+ for (i = 0; i < MAX_ID; i++) {
+ if (htlbpagek[i].key == 0)
+ break;
+ }
+ if (i == MAX_ID)
+ return NULL;
+ inode = kmalloc(sizeof (struct inode), GFP_KERNEL);
+ if (inode == NULL)
+ return NULL;
+
+ inode_init_once(inode);
+ atomic_inc(&inode->i_writecount);
+ inode->i_mapping = &inode->i_data;
+ inode->i_mapping->host = inode;
+ inode->i_ino = (unsigned long)key;
+
+ htlbpagek[i].key = key;
+ htlbpagek[i].in = inode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_mode = prot;
+ inode->i_size = len;
+ return inode;
+}
+
+static int check_size_prot(struct inode *inode, unsigned long len, int prot, int flag)
+{
+ if (inode->i_uid != current->fsuid)
+ return -1;
+ if (inode->i_gid != current->fsgid)
+ return -1;
+ if (inode->i_size != len)
+ return -1;
+ return 0;
+}
+
+static int alloc_shared_hugetlb_pages(int key, unsigned long addr, unsigned long len,
+ int prot, int flag)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct inode *inode;
+ struct address_space *mapping;
+ struct page *page;
+ int idx;
+ int retval = -ENOMEM;
+ int newalloc = 0;
+
+try_again:
+ spin_lock(&htlbpage_lock);
+
+ inode = find_key_inode(key);
+ if (inode == NULL) {
+ if (!capable(CAP_SYS_ADMIN)) {
+ if (!in_group_p(0)) {
+ retval = -EPERM;
+ goto out_err;
+ }
+ }
+ if (!(flag & IPC_CREAT)) {
+ retval = -ENOENT;
+ goto out_err;
+ }
+ inode = set_new_inode(len, prot, flag, key);
+ if (inode == NULL)
+ goto out_err;
+ newalloc = 1;
+ } else {
+ if (check_size_prot(inode, len, prot, flag) < 0) {
+ retval = -EINVAL;
+ goto out_err;
+ } else if (atomic_read(&inode->i_writecount)) {
+ spin_unlock(&htlbpage_lock);
+ goto try_again;
+ }
+ }
+ spin_unlock(&htlbpage_lock);
+ mapping = inode->i_mapping;
+
+ addr = do_mmap_pgoff(NULL, addr, len, (unsigned long) prot,
+ MAP_NORESERVE|MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0);
+ if (IS_ERR((void *) addr))
+ goto freeinode;
+
+ vma = find_vma(mm, addr);
+ if (!vma) {
+ retval = -EINVAL;
+ goto freeinode;
+ }
+
+ spin_lock(&mm->page_table_lock);
+ do {
+ pte_t *pte = huge_pte_alloc_map(mm, addr);
+
+ if (!pte || !pte_none(pte)) {
+ if (pte)
+ pte_unmap(pte);
+ goto out;
+ }
+
+ idx = (addr - vma->vm_start) >> HPAGE_SHIFT;
+ page = find_get_page(mapping, idx);
+ if (page == NULL) {
+ page = alloc_hugetlb_page();
+ if (page == NULL) {
+ pte_unmap(pte);
+ goto out;
+ }
+ add_to_page_cache(page, mapping, idx);
+ }
+ set_huge_pte(mm, vma, page, pte,
+ (vma->vm_flags & VM_WRITE));
+ pte_unmap(pte);
+
+ addr += HPAGE_SIZE;
+ } while (addr < vma->vm_end);
+
+ retval = 0;
+ vma->vm_flags |= (VM_HUGETLB | VM_RESERVED);
+ vma->vm_ops = &hugetlb_vm_ops;
+ spin_unlock(&mm->page_table_lock);
+ spin_lock(&htlbpage_lock);
+ atomic_set(&inode->i_writecount, 0);
+ spin_unlock(&htlbpage_lock);
+
+ return retval;
+
+out:
+ if (addr > vma->vm_start) {
+ unsigned long raddr;
+ raddr = vma->vm_end;
+ vma->vm_end = addr;
+
+ flush_cache_range(vma, vma->vm_start, vma->vm_end);
+ zap_hugetlb_resources(vma);
+ flush_tlb_range(vma, vma->vm_start, vma->vm_end);
+
+ vma->vm_end = raddr;
+ }
+ spin_unlock(&mm->page_table_lock);
+ do_munmap(mm, vma->vm_start, len);
+ if (newalloc)
+ goto freeinode;
+
+ return retval;
+
+out_err:
+ spin_unlock(&htlbpage_lock);
+
+freeinode:
+ if (newalloc) {
+ for (idx = 0; idx < MAX_ID; idx++) {
+ if (htlbpagek[idx].key == inode->i_ino) {
+ htlbpagek[idx].key = 0;
+ htlbpagek[idx].in = NULL;
+ break;
+ }
+ }
+ kfree(inode);
+ }
+ return retval;
+}
+
+static int alloc_private_hugetlb_pages(int key, unsigned long addr, unsigned long len,
+ int prot, int flag)
+{
+ if (!capable(CAP_SYS_ADMIN)) {
+ if (!in_group_p(0))
+ return -EPERM;
+ }
+ addr = do_mmap_pgoff(NULL, addr, len, prot,
+ MAP_NORESERVE|MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 0);
+ if (IS_ERR((void *) addr))
+ return -ENOMEM;
+ if (make_hugetlb_pages_present(addr, (addr + len), flag) < 0) {
+ do_munmap(current->mm, addr, len);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int alloc_hugetlb_pages(int key, unsigned long addr, unsigned long len, int prot,
+ int flag)
+{
+ if (key > 0)
+ return alloc_shared_hugetlb_pages(key, addr, len, prot, flag);
+ return alloc_private_hugetlb_pages(key, addr, len, prot, flag);
+}
+
+extern long htlbzone_pages;
+extern struct list_head htlbpage_freelist;
+
+int set_hugetlb_mem_size(int count)
+{
+ int j, lcount;
+ struct page *page, *map;
+
+ if (count < 0)
+ lcount = count;
+ else
+ lcount = count - htlbzone_pages;
+
+ if (lcount > 0) { /* Increase the mem size. */
+ while (lcount--) {
+ page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER);
+ if (page == NULL)
+ break;
+ map = page;
+ for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
+ SetPageReserved(map);
+ map++;
+ }
+ spin_lock(&htlbpage_lock);
+ list_add(&page->list, &htlbpage_freelist);
+ htlbpagemem++;
+ htlbzone_pages++;
+ spin_unlock(&htlbpage_lock);
+ }
+ return (int) htlbzone_pages;
+ }
+
+ /* Shrink the memory size. */
+ while (lcount++) {
+ page = alloc_hugetlb_page();
+ if (page == NULL)
+ break;
+
+ spin_lock(&htlbpage_lock);
+ htlbzone_pages--;
+ spin_unlock(&htlbpage_lock);
+
+ map = page;
+ for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
+ map->flags &= ~(1UL << PG_locked | 1UL << PG_error |
+ 1UL << PG_referenced |
+ 1UL << PG_dirty | 1UL << PG_active |
+ 1UL << PG_reserved |
+ 1UL << PG_private | 1UL << PG_writeback);
+ set_page_count(page, 0);
+ map++;
+ }
+ set_page_count(page, 1);
+ __free_pages(page, HUGETLB_PAGE_ORDER);
+ }
+ return (int) htlbzone_pages;
+}
+
+static struct vm_operations_struct hugetlb_vm_ops = {
+ .close = zap_hugetlb_resources,
+};
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index b3233175628b..6410e974796a 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1690,6 +1690,13 @@ static void __init taint_real_pages(void)
}
}
+#ifdef CONFIG_HUGETLB_PAGE
+long htlbpagemem = 0;
+long htlbpage_max;
+long htlbzone_pages;
+extern struct list_head htlbpage_freelist;
+#endif
+
void __init mem_init(void)
{
unsigned long codepages, datapages, initpages;
@@ -1766,6 +1773,32 @@ void __init mem_init(void)
if (tlb_type == cheetah || tlb_type == cheetah_plus)
cheetah_ecache_flush_init();
+#ifdef CONFIG_HUGETLB_PAGE
+ {
+ long i, j;
+ struct page *page, *map;
+
+ /* For now reserve quarter for hugetlb_pages. */
+ htlbzone_pages = (num_physpages >> ((HPAGE_SHIFT - PAGE_SHIFT) + 2)) ;
+
+ /* Will make this kernel command line. */
+ INIT_LIST_HEAD(&htlbpage_freelist);
+ for (i = 0; i < htlbzone_pages; i++) {
+ page = alloc_pages(GFP_ATOMIC, HUGETLB_PAGE_ORDER);
+ if (page == NULL)
+ break;
+ map = page;
+ for (j = 0; j < (HPAGE_SIZE / PAGE_SIZE); j++) {
+ SetPageReserved(map);
+ map++;
+ }
+ list_add(&page->list, &htlbpage_freelist);
+ }
+ printk("Total Huge_TLB_Page memory pages allocated %ld\n", i);
+ htlbzone_pages = htlbpagemem = i;
+ htlbpage_max = i;
+ }
+#endif
}
void free_initmem (void)
diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h
index 5d3ba06ceb4c..f928fde5caee 100644
--- a/include/asm-sparc/unistd.h
+++ b/include/asm-sparc/unistd.h
@@ -249,8 +249,8 @@
#define __NR_time 231 /* Linux Specific */
/* #define __NR_oldstat 232 Linux Specific */
#define __NR_stime 233 /* Linux Specific */
-/* #define __NR_oldfstat 234 Linux Specific */
-/* #define __NR_phys 235 Linux Specific */
+#define __NR_alloc_hugepages 234 /* Linux Specific */
+#define __NR_free_hugepages 235 /* Linux Specific */
#define __NR__llseek 236 /* Linux Specific */
#define __NR_mlock 237
#define __NR_munlock 238
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index c01cafda59ee..df6a1057484a 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -3,6 +3,8 @@
#ifndef _SPARC64_PAGE_H
#define _SPARC64_PAGE_H
+#include <linux/config.h>
+
#define PAGE_SHIFT 13
#ifndef __ASSEMBLY__
/* I have my suspicions... -DaveM */
@@ -99,6 +101,14 @@ typedef unsigned long iopgprot_t;
#endif /* (STRICT_MM_TYPECHECKS) */
+#define HPAGE_SHIFT 22
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+#endif
+
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
(0x0000000070000000UL) : (PAGE_OFFSET))
diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h
index d7eb4d220b79..2959d9071240 100644
--- a/include/asm-sparc64/unistd.h
+++ b/include/asm-sparc64/unistd.h
@@ -251,8 +251,8 @@
#endif
/* #define __NR_oldstat 232 Linux Specific */
#define __NR_stime 233 /* Linux Specific */
-/* #define __NR_oldfstat 234 Linux Specific */
-/* #define __NR_phys 235 Linux Specific */
+#define __NR_alloc_hugepages 234 /* Linux Specific */
+#define __NR_free_hugepages 235 /* Linux Specific */
#define __NR__llseek 236 /* Linux Specific */
#define __NR_mlock 237
#define __NR_munlock 238