Skip to content

Commit 6606c3e

Browse files
Zachary AmsdenLinus Torvalds
Zachary Amsden
authored and
Linus Torvalds
committed
[PATCH] paravirt: lazy mmu mode hooks.patch
Implement lazy MMU update hooks which are SMP safe for both direct and shadow page tables. The idea is that PTE updates and page invalidations while in lazy mode can be batched into a single hypercall. We use this in VMI for shadow page table synchronization, and it is a win. It also can be used by PPC and for direct page tables on Xen. For SMP, the enter / leave must happen under protection of the page table locks for page tables which are being modified. This is because otherwise, you end up with stale state in the batched hypercall, which other CPUs can race ahead of. Doing this under the protection of the locks guarantees the synchronization is correct, and also means that spurious faults which are generated during this window by remote CPUs are properly handled, as the page fault handler must re-check the PTE under protection of the same lock. Signed-off-by: Zachary Amsden <[email protected]> Signed-off-by: Jeremy Fitzhardinge <[email protected]> Cc: Rusty Russell <[email protected]> Cc: Andi Kleen <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 9888a1c commit 6606c3e

File tree

4 files changed

+32
-0
lines changed

4 files changed

+32
-0
lines changed

include/asm-generic/pgtable.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,26 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
170170
#define move_pte(pte, prot, old_addr, new_addr) (pte)
171171
#endif
172172

173+
/*
174+
* A facility to provide lazy MMU batching. This allows PTE updates and
175+
* page invalidations to be delayed until a call to leave lazy MMU mode
176+
* is issued. Some architectures may benefit from doing this, and it is
177+
* beneficial for both shadow and direct mode hypervisors, which may batch
178+
* the PTE updates which happen during this window. Note that using this
179+
* interface requires that read hazards be removed from the code. A read
180+
* hazard could result in the direct mode hypervisor case, since the actual
181+
* write to the page tables may not yet have taken place, so reads though
182+
* a raw PTE pointer after it has been modified are not guaranteed to be
183+
* up to date. This mode can only be entered and left under the protection of
184+
* the page table locks for all page tables which may be modified. In the UP
185+
* case, this is required so that preemption is disabled, and in the SMP case,
186+
* it must synchronize the delayed page table writes properly on other CPUs.
187+
*/
188+
#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
189+
#define arch_enter_lazy_mmu_mode() do {} while (0)
190+
#define arch_leave_lazy_mmu_mode() do {} while (0)
191+
#endif
192+
173193
/*
174194
* When walking page tables, get the address of the next boundary,
175195
* or the end address of the range if that comes earlier. Although no

mm/memory.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
506506
src_pte = pte_offset_map_nested(src_pmd, addr);
507507
src_ptl = pte_lockptr(src_mm, src_pmd);
508508
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
509+
arch_enter_lazy_mmu_mode();
509510

510511
do {
511512
/*
@@ -527,6 +528,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
527528
progress += 8;
528529
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
529530

531+
arch_leave_lazy_mmu_mode();
530532
spin_unlock(src_ptl);
531533
pte_unmap_nested(src_pte - 1);
532534
add_mm_rss(dst_mm, rss[0], rss[1]);
@@ -628,6 +630,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
628630
int anon_rss = 0;
629631

630632
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
633+
arch_enter_lazy_mmu_mode();
631634
do {
632635
pte_t ptent = *pte;
633636
if (pte_none(ptent)) {
@@ -694,6 +697,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
694697
} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
695698

696699
add_mm_rss(mm, file_rss, anon_rss);
700+
arch_leave_lazy_mmu_mode();
697701
pte_unmap_unlock(pte - 1, ptl);
698702

699703
return addr;
@@ -1109,6 +1113,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
11091113
pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
11101114
if (!pte)
11111115
return -ENOMEM;
1116+
arch_enter_lazy_mmu_mode();
11121117
do {
11131118
struct page *page = ZERO_PAGE(addr);
11141119
pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
@@ -1118,6 +1123,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
11181123
BUG_ON(!pte_none(*pte));
11191124
set_pte_at(mm, addr, pte, zero_pte);
11201125
} while (pte++, addr += PAGE_SIZE, addr != end);
1126+
arch_leave_lazy_mmu_mode();
11211127
pte_unmap_unlock(pte - 1, ptl);
11221128
return 0;
11231129
}
@@ -1275,11 +1281,13 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
12751281
pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
12761282
if (!pte)
12771283
return -ENOMEM;
1284+
arch_enter_lazy_mmu_mode();
12781285
do {
12791286
BUG_ON(!pte_none(*pte));
12801287
set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
12811288
pfn++;
12821289
} while (pte++, addr += PAGE_SIZE, addr != end);
1290+
arch_leave_lazy_mmu_mode();
12831291
pte_unmap_unlock(pte - 1, ptl);
12841292
return 0;
12851293
}

mm/mprotect.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
3434
spinlock_t *ptl;
3535

3636
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
37+
arch_enter_lazy_mmu_mode();
3738
do {
3839
oldpte = *pte;
3940
if (pte_present(oldpte)) {
@@ -70,6 +71,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
7071
}
7172

7273
} while (pte++, addr += PAGE_SIZE, addr != end);
74+
arch_leave_lazy_mmu_mode();
7375
pte_unmap_unlock(pte - 1, ptl);
7476
}
7577

mm/mremap.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
9898
new_ptl = pte_lockptr(mm, new_pmd);
9999
if (new_ptl != old_ptl)
100100
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
101+
arch_enter_lazy_mmu_mode();
101102

102103
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
103104
new_pte++, new_addr += PAGE_SIZE) {
@@ -109,6 +110,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
109110
set_pte_at(mm, new_addr, new_pte, pte);
110111
}
111112

113+
arch_leave_lazy_mmu_mode();
112114
if (new_ptl != old_ptl)
113115
spin_unlock(new_ptl);
114116
pte_unmap_nested(new_pte - 1);

0 commit comments

Comments
 (0)