Skip to content

Commit 2820b0f

Browse files
rikvanrielakpm00
authored andcommitted
hugetlbfs: close race between MADV_DONTNEED and page fault
Malloc libraries, like jemalloc and tcalloc, take decisions on when to call madvise independently from the code in the main application. This sometimes results in the application page faulting on an address, right after the malloc library has shot down the backing memory with MADV_DONTNEED. Usually this is harmless, because we always have some 4kB pages sitting around to satisfy a page fault. However, with hugetlbfs systems often allocate only the exact number of huge pages that the application wants. Due to TLB batching, hugetlbfs MADV_DONTNEED will free pages outside of any lock taken on the page fault path, which can open up the following race condition: CPU 1 CPU 2 MADV_DONTNEED unmap page shoot down TLB entry page fault fail to allocate a huge page killed with SIGBUS free page Fix that race by pulling the locking from __unmap_hugepage_final_range into helper functions called from zap_page_range_single. This ensures page faults stay locked out of the MADV_DONTNEED VMA until the huge pages have actually been freed. Link: https://lkml.kernel.org/r/[email protected] Fixes: 04ada09 ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Rik van Riel <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Muchun Song <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent bf49169 commit 2820b0f

File tree

3 files changed

+63
-19
lines changed

3 files changed

+63
-19
lines changed

include/linux/hugetlb.h

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
139139
void unmap_hugepage_range(struct vm_area_struct *,
140140
unsigned long, unsigned long, struct page *,
141141
zap_flags_t);
142-
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
142+
void __unmap_hugepage_range(struct mmu_gather *tlb,
143143
struct vm_area_struct *vma,
144144
unsigned long start, unsigned long end,
145145
struct page *ref_page, zap_flags_t zap_flags);
@@ -246,6 +246,25 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
246246
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
247247
unsigned long *start, unsigned long *end);
248248

249+
extern void __hugetlb_zap_begin(struct vm_area_struct *vma,
250+
unsigned long *begin, unsigned long *end);
251+
extern void __hugetlb_zap_end(struct vm_area_struct *vma,
252+
struct zap_details *details);
253+
254+
static inline void hugetlb_zap_begin(struct vm_area_struct *vma,
255+
unsigned long *start, unsigned long *end)
256+
{
257+
if (is_vm_hugetlb_page(vma))
258+
__hugetlb_zap_begin(vma, start, end);
259+
}
260+
261+
static inline void hugetlb_zap_end(struct vm_area_struct *vma,
262+
struct zap_details *details)
263+
{
264+
if (is_vm_hugetlb_page(vma))
265+
__hugetlb_zap_end(vma, details);
266+
}
267+
249268
void hugetlb_vma_lock_read(struct vm_area_struct *vma);
250269
void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
251270
void hugetlb_vma_lock_write(struct vm_area_struct *vma);
@@ -297,6 +316,18 @@ static inline void adjust_range_if_pmd_sharing_possible(
297316
{
298317
}
299318

319+
static inline void hugetlb_zap_begin(
320+
struct vm_area_struct *vma,
321+
unsigned long *start, unsigned long *end)
322+
{
323+
}
324+
325+
static inline void hugetlb_zap_end(
326+
struct vm_area_struct *vma,
327+
struct zap_details *details)
328+
{
329+
}
330+
300331
static inline struct page *hugetlb_follow_page_mask(
301332
struct vm_area_struct *vma, unsigned long address, unsigned int flags,
302333
unsigned int *page_mask)
@@ -442,7 +473,7 @@ static inline long hugetlb_change_protection(
442473
return 0;
443474
}
444475

445-
static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
476+
static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
446477
struct vm_area_struct *vma, unsigned long start,
447478
unsigned long end, struct page *ref_page,
448479
zap_flags_t zap_flags)

mm/hugetlb.c

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5306,9 +5306,9 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
53065306
return len + old_addr - old_end;
53075307
}
53085308

5309-
static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
5310-
unsigned long start, unsigned long end,
5311-
struct page *ref_page, zap_flags_t zap_flags)
5309+
void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
5310+
unsigned long start, unsigned long end,
5311+
struct page *ref_page, zap_flags_t zap_flags)
53125312
{
53135313
struct mm_struct *mm = vma->vm_mm;
53145314
unsigned long address;
@@ -5437,16 +5437,25 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
54375437
tlb_flush_mmu_tlbonly(tlb);
54385438
}
54395439

5440-
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
5441-
struct vm_area_struct *vma, unsigned long start,
5442-
unsigned long end, struct page *ref_page,
5443-
zap_flags_t zap_flags)
5440+
void __hugetlb_zap_begin(struct vm_area_struct *vma,
5441+
unsigned long *start, unsigned long *end)
54445442
{
5443+
if (!vma->vm_file) /* hugetlbfs_file_mmap error */
5444+
return;
5445+
5446+
adjust_range_if_pmd_sharing_possible(vma, start, end);
54455447
hugetlb_vma_lock_write(vma);
5446-
i_mmap_lock_write(vma->vm_file->f_mapping);
5448+
if (vma->vm_file)
5449+
i_mmap_lock_write(vma->vm_file->f_mapping);
5450+
}
54475451

5448-
/* mmu notification performed in caller */
5449-
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
5452+
void __hugetlb_zap_end(struct vm_area_struct *vma,
5453+
struct zap_details *details)
5454+
{
5455+
zap_flags_t zap_flags = details ? details->zap_flags : 0;
5456+
5457+
if (!vma->vm_file) /* hugetlbfs_file_mmap error */
5458+
return;
54505459

54515460
if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */
54525461
/*
@@ -5459,11 +5468,12 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
54595468
* someone else.
54605469
*/
54615470
__hugetlb_vma_unlock_write_free(vma);
5462-
i_mmap_unlock_write(vma->vm_file->f_mapping);
54635471
} else {
5464-
i_mmap_unlock_write(vma->vm_file->f_mapping);
54655472
hugetlb_vma_unlock_write(vma);
54665473
}
5474+
5475+
if (vma->vm_file)
5476+
i_mmap_unlock_write(vma->vm_file->f_mapping);
54675477
}
54685478

54695479
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,

mm/memory.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1683,7 +1683,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
16831683
if (vma->vm_file) {
16841684
zap_flags_t zap_flags = details ?
16851685
details->zap_flags : 0;
1686-
__unmap_hugepage_range_final(tlb, vma, start, end,
1686+
__unmap_hugepage_range(tlb, vma, start, end,
16871687
NULL, zap_flags);
16881688
}
16891689
} else
@@ -1728,8 +1728,12 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
17281728
start_addr, end_addr);
17291729
mmu_notifier_invalidate_range_start(&range);
17301730
do {
1731-
unmap_single_vma(tlb, vma, start_addr, end_addr, &details,
1731+
unsigned long start = start_addr;
1732+
unsigned long end = end_addr;
1733+
hugetlb_zap_begin(vma, &start, &end);
1734+
unmap_single_vma(tlb, vma, start, end, &details,
17321735
mm_wr_locked);
1736+
hugetlb_zap_end(vma, &details);
17331737
} while ((vma = mas_find(mas, tree_end - 1)) != NULL);
17341738
mmu_notifier_invalidate_range_end(&range);
17351739
}
@@ -1753,9 +1757,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
17531757
lru_add_drain();
17541758
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
17551759
address, end);
1756-
if (is_vm_hugetlb_page(vma))
1757-
adjust_range_if_pmd_sharing_possible(vma, &range.start,
1758-
&range.end);
1760+
hugetlb_zap_begin(vma, &range.start, &range.end);
17591761
tlb_gather_mmu(&tlb, vma->vm_mm);
17601762
update_hiwater_rss(vma->vm_mm);
17611763
mmu_notifier_invalidate_range_start(&range);
@@ -1766,6 +1768,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
17661768
unmap_single_vma(&tlb, vma, address, end, details, false);
17671769
mmu_notifier_invalidate_range_end(&range);
17681770
tlb_finish_mmu(&tlb);
1771+
hugetlb_zap_end(vma, details);
17691772
}
17701773

17711774
/**

0 commit comments

Comments
 (0)