diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 82 |
1 files changed, 65 insertions, 17 deletions
diff --git a/mm/memory.c b/mm/memory.c index 74b45e258323..aad432e71251 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,6 +65,7 @@ #include <linux/gfp.h> #include <linux/migrate.h> #include <linux/string.h> +#include <linux/shmem_fs.h> #include <linux/memory-tiers.h> #include <linux/debugfs.h> #include <linux/userfaultfd_k.h> @@ -5501,8 +5502,25 @@ fallback: return ret; } + if (!needs_fallback && vma->vm_file) { + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t file_end; + + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); + + /* + * Do not allow to map with PTEs beyond i_size and with PMD + * across i_size to preserve SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + needs_fallback = !shmem_mapping(mapping) && + file_end < folio_next_index(folio); + } + if (pmd_none(*vmf->pmd)) { - if (folio_test_pmd_mappable(folio)) { + if (!needs_fallback && folio_test_pmd_mappable(folio)) { ret = do_set_pmd(vmf, folio, page); if (ret != VM_FAULT_FALLBACK) return ret; @@ -6116,6 +6134,45 @@ split: } /* + * The page faults may be spurious because of the racy access to the + * page table. For example, a non-populated virtual page is accessed + * on 2 CPUs simultaneously, thus the page faults are triggered on + * both CPUs. However, it's possible that one CPU (say CPU A) cannot + * find the reason for the page fault if the other CPU (say CPU B) has + * changed the page table before the PTE is checked on CPU A. Most of + * the time, the spurious page faults can be ignored safely. However, + * if the page fault is for the write access, it's possible that a + * stale read-only TLB entry exists in the local CPU and needs to be + * flushed on some architectures. This is called the spurious page + * fault fixing. + * + * Note: flush_tlb_fix_spurious_fault() is defined as flush_tlb_page() + * by default and used as such on most architectures, while + * flush_tlb_fix_spurious_fault_pmd() is defined as NOP by default and + * used as such on most architectures. + */ +static void fix_spurious_fault(struct vm_fault *vmf, + enum pgtable_level ptlevel) +{ + /* Skip spurious TLB flush for retried page fault */ + if (vmf->flags & FAULT_FLAG_TRIED) + return; + /* + * This is needed only for protection faults but the arch code + * is not yet telling us if this is a protection fault or not. + * This still avoids useless tlb flushes for .text page faults + * with threads. + */ + if (vmf->flags & FAULT_FLAG_WRITE) { + if (ptlevel == PGTABLE_LEVEL_PTE) + flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, + vmf->pte); + else + flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address, + vmf->pmd); + } +} +/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. @@ -6196,23 +6253,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } entry = pte_mkyoung(entry); if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, - vmf->flags & FAULT_FLAG_WRITE)) { + vmf->flags & FAULT_FLAG_WRITE)) update_mmu_cache_range(vmf, vmf->vma, vmf->address, vmf->pte, 1); - } else { - /* Skip spurious TLB flush for retried page fault */ - if (vmf->flags & FAULT_FLAG_TRIED) - goto unlock; - /* - * This is needed only for protection faults but the arch code - * is not yet telling us if this is a protection fault or not. - * This still avoids useless tlb flushes for .text page faults - * with threads. - */ - if (vmf->flags & FAULT_FLAG_WRITE) - flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, - vmf->pte); - } + else + fix_spurious_fault(vmf, PGTABLE_LEVEL_PTE); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; @@ -6309,7 +6354,10 @@ retry_pud: if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - huge_pmd_set_accessed(&vmf); + vmf.ptl = pmd_lock(mm, vmf.pmd); + if (!huge_pmd_set_accessed(&vmf)) + fix_spurious_fault(&vmf, PGTABLE_LEVEL_PMD); + spin_unlock(vmf.ptl); return 0; } } |
