Skip to content

Commit 186525b

Browse files
author
Ingo Molnar
committed
mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions
- Untangle the somewhat incestous way of how VMALLOC_START is used all across the kernel, but is, on x86, defined deep inside one of the lowest level page table headers. It doesn't help that vmalloc.h only includes a single asm header: #include <asm/page.h> /* pgprot_t */ So there was no existing cross-arch way to decouple address layout definitions from page.h details. I used this: #ifndef VMALLOC_START # include <asm/vmalloc.h> #endif This way every architecture that wants to simplify page.h can do so. - Also on x86 we had a couple of LDT related inline functions that used the late-stage address space layout positions - but these could be uninlined without real trouble - the end result is cleaner this way as well. Signed-off-by: Ingo Molnar <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Rik van Riel <[email protected]> Cc: [email protected] Cc: [email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 1f059df commit 186525b

File tree

15 files changed

+183
-154
lines changed

15 files changed

+183
-154
lines changed

arch/x86/include/asm/cpu_entry_area.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/percpu-defs.h>
77
#include <asm/processor.h>
88
#include <asm/intel_ds.h>
9+
#include <asm/pgtable_areas.h>
910

1011
#ifdef CONFIG_X86_64
1112

@@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
134135
extern void setup_cpu_entry_areas(void);
135136
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
136137

137-
/* Single page reserved for the readonly IDT mapping: */
138-
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
139-
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
140-
141-
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
142-
143-
#define CPU_ENTRY_AREA_MAP_SIZE \
144-
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
145-
146138
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
147139

148140
static inline struct entry_stack *cpu_entry_stack(int cpu)

arch/x86/include/asm/mmu_context.h

Lines changed: 6 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,6 @@ struct ldt_struct {
6969
int slot;
7070
};
7171

72-
/* This is a multiple of PAGE_SIZE. */
73-
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
74-
75-
static inline void *ldt_slot_va(int slot)
76-
{
77-
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
78-
}
79-
8072
/*
8173
* Used for LDT copy/destruction.
8274
*/
@@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
9991
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
10092
#endif
10193

102-
static inline void load_mm_ldt(struct mm_struct *mm)
103-
{
10494
#ifdef CONFIG_MODIFY_LDT_SYSCALL
105-
struct ldt_struct *ldt;
106-
107-
/* READ_ONCE synchronizes with smp_store_release */
108-
ldt = READ_ONCE(mm->context.ldt);
109-
110-
/*
111-
* Any change to mm->context.ldt is followed by an IPI to all
112-
* CPUs with the mm active. The LDT will not be freed until
113-
* after the IPI is handled by all such CPUs. This means that,
114-
* if the ldt_struct changes before we return, the values we see
115-
* will be safe, and the new values will be loaded before we run
116-
* any user code.
117-
*
118-
* NB: don't try to convert this to use RCU without extreme care.
119-
* We would still need IRQs off, because we don't want to change
120-
* the local LDT after an IPI loaded a newer value than the one
121-
* that we can see.
122-
*/
123-
124-
if (unlikely(ldt)) {
125-
if (static_cpu_has(X86_FEATURE_PTI)) {
126-
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
127-
/*
128-
* Whoops -- either the new LDT isn't mapped
129-
* (if slot == -1) or is mapped into a bogus
130-
* slot (if slot > 1).
131-
*/
132-
clear_LDT();
133-
return;
134-
}
135-
136-
/*
137-
* If page table isolation is enabled, ldt->entries
138-
* will not be mapped in the userspace pagetables.
139-
* Tell the CPU to access the LDT through the alias
140-
* at ldt_slot_va(ldt->slot).
141-
*/
142-
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
143-
} else {
144-
set_ldt(ldt->entries, ldt->nr_entries);
145-
}
146-
} else {
147-
clear_LDT();
148-
}
95+
extern void load_mm_ldt(struct mm_struct *mm);
96+
extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
14997
#else
98+
static inline void load_mm_ldt(struct mm_struct *mm)
99+
{
150100
clear_LDT();
151-
#endif
152101
}
153-
154102
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
155103
{
156-
#ifdef CONFIG_MODIFY_LDT_SYSCALL
157-
/*
158-
* Load the LDT if either the old or new mm had an LDT.
159-
*
160-
* An mm will never go from having an LDT to not having an LDT. Two
161-
* mms never share an LDT, so we don't gain anything by checking to
162-
* see whether the LDT changed. There's also no guarantee that
163-
* prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
164-
* then prev->context.ldt will also be non-NULL.
165-
*
166-
* If we really cared, we could optimize the case where prev == next
167-
* and we're exiting lazy mode. Most of the time, if this happens,
168-
* we don't actually need to reload LDTR, but modify_ldt() is mostly
169-
* used by legacy code and emulators where we don't need this level of
170-
* performance.
171-
*
172-
* This uses | instead of || because it generates better code.
173-
*/
174-
if (unlikely((unsigned long)prev->context.ldt |
175-
(unsigned long)next->context.ldt))
176-
load_mm_ldt(next);
177-
#endif
178-
179104
DEBUG_LOCKS_WARN_ON(preemptible());
180105
}
106+
#endif
181107

182-
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
108+
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
183109

184110
/*
185111
* Init a new mm. Used on mm copies, like at fork()
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#ifndef _ASM_X86_PGTABLE_32_AREAS_H
2+
#define _ASM_X86_PGTABLE_32_AREAS_H
3+
4+
#include <asm/cpu_entry_area.h>
5+
6+
/*
7+
* Just any arbitrary offset to the start of the vmalloc VM area: the
8+
* current 8MB value just means that there will be a 8MB "hole" after the
9+
* physical memory until the kernel virtual memory starts. That means that
10+
* any out-of-bounds memory accesses will hopefully be caught.
11+
* The vmalloc() routines leaves a hole of 4kB between each vmalloced
12+
* area for the same reason. ;)
13+
*/
14+
#define VMALLOC_OFFSET (8 * 1024 * 1024)
15+
16+
#ifndef __ASSEMBLY__
17+
extern bool __vmalloc_start_set; /* set once high_memory is set */
18+
#endif
19+
20+
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
21+
#ifdef CONFIG_X86_PAE
22+
#define LAST_PKMAP 512
23+
#else
24+
#define LAST_PKMAP 1024
25+
#endif
26+
27+
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))
28+
29+
/* The +1 is for the readonly IDT page: */
30+
#define CPU_ENTRY_AREA_BASE \
31+
((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
32+
33+
#define LDT_BASE_ADDR \
34+
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
35+
36+
#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
37+
38+
#define PKMAP_BASE \
39+
((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
40+
41+
#ifdef CONFIG_HIGHMEM
42+
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
43+
#else
44+
# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
45+
#endif
46+
47+
#define MODULES_VADDR VMALLOC_START
48+
#define MODULES_END VMALLOC_END
49+
#define MODULES_LEN (MODULES_VADDR - MODULES_END)
50+
51+
#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
52+
53+
#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
Lines changed: 3 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* SPDX-License-Identifier: GPL-2.0 */
2-
#ifndef _ASM_X86_PGTABLE_32_DEFS_H
3-
#define _ASM_X86_PGTABLE_32_DEFS_H
2+
#ifndef _ASM_X86_PGTABLE_32_TYPES_H
3+
#define _ASM_X86_PGTABLE_32_TYPES_H
44

55
/*
66
* The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,55 +20,4 @@
2020
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
2121
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
2222

23-
/* Just any arbitrary offset to the start of the vmalloc VM area: the
24-
* current 8MB value just means that there will be a 8MB "hole" after the
25-
* physical memory until the kernel virtual memory starts. That means that
26-
* any out-of-bounds memory accesses will hopefully be caught.
27-
* The vmalloc() routines leaves a hole of 4kB between each vmalloced
28-
* area for the same reason. ;)
29-
*/
30-
#define VMALLOC_OFFSET (8 * 1024 * 1024)
31-
32-
#ifndef __ASSEMBLY__
33-
extern bool __vmalloc_start_set; /* set once high_memory is set */
34-
#endif
35-
36-
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
37-
#ifdef CONFIG_X86_PAE
38-
#define LAST_PKMAP 512
39-
#else
40-
#define LAST_PKMAP 1024
41-
#endif
42-
43-
/*
44-
* This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
45-
* Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
46-
* to avoid include recursion hell.
47-
*/
48-
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
49-
50-
/* The +1 is for the readonly IDT page: */
51-
#define CPU_ENTRY_AREA_BASE \
52-
((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
53-
54-
#define LDT_BASE_ADDR \
55-
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
56-
57-
#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE)
58-
59-
#define PKMAP_BASE \
60-
((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)
61-
62-
#ifdef CONFIG_HIGHMEM
63-
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
64-
#else
65-
# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE)
66-
#endif
67-
68-
#define MODULES_VADDR VMALLOC_START
69-
#define MODULES_END VMALLOC_END
70-
#define MODULES_LEN (MODULES_VADDR - MODULES_END)
71-
72-
#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
73-
74-
#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
23+
#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#ifndef _ASM_X86_PGTABLE_AREAS_H
2+
#define _ASM_X86_PGTABLE_AREAS_H
3+
4+
#ifdef CONFIG_X86_32
5+
# include <asm/pgtable_32_areas.h>
6+
#endif
7+
8+
/* Single page reserved for the readonly IDT mapping: */
9+
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
10+
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
11+
12+
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
13+
14+
#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
15+
16+
#endif /* _ASM_X86_PGTABLE_AREAS_H */

arch/x86/include/asm/vmalloc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#ifndef _ASM_X86_VMALLOC_H
22
#define _ASM_X86_VMALLOC_H
33

4+
#include <asm/pgtable_areas.h>
5+
46
#endif /* _ASM_X86_VMALLOC_H */

arch/x86/kernel/ldt.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,89 @@
2828
#include <asm/desc.h>
2929
#include <asm/mmu_context.h>
3030
#include <asm/syscalls.h>
31+
#include <asm/pgtable_areas.h>
32+
33+
/* This is a multiple of PAGE_SIZE. */
34+
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
35+
36+
static inline void *ldt_slot_va(int slot)
37+
{
38+
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
39+
}
40+
41+
void load_mm_ldt(struct mm_struct *mm)
42+
{
43+
struct ldt_struct *ldt;
44+
45+
/* READ_ONCE synchronizes with smp_store_release */
46+
ldt = READ_ONCE(mm->context.ldt);
47+
48+
/*
49+
* Any change to mm->context.ldt is followed by an IPI to all
50+
* CPUs with the mm active. The LDT will not be freed until
51+
* after the IPI is handled by all such CPUs. This means that,
52+
* if the ldt_struct changes before we return, the values we see
53+
* will be safe, and the new values will be loaded before we run
54+
* any user code.
55+
*
56+
* NB: don't try to convert this to use RCU without extreme care.
57+
* We would still need IRQs off, because we don't want to change
58+
* the local LDT after an IPI loaded a newer value than the one
59+
* that we can see.
60+
*/
61+
62+
if (unlikely(ldt)) {
63+
if (static_cpu_has(X86_FEATURE_PTI)) {
64+
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
65+
/*
66+
* Whoops -- either the new LDT isn't mapped
67+
* (if slot == -1) or is mapped into a bogus
68+
* slot (if slot > 1).
69+
*/
70+
clear_LDT();
71+
return;
72+
}
73+
74+
/*
75+
* If page table isolation is enabled, ldt->entries
76+
* will not be mapped in the userspace pagetables.
77+
* Tell the CPU to access the LDT through the alias
78+
* at ldt_slot_va(ldt->slot).
79+
*/
80+
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
81+
} else {
82+
set_ldt(ldt->entries, ldt->nr_entries);
83+
}
84+
} else {
85+
clear_LDT();
86+
}
87+
}
88+
89+
void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
90+
{
91+
/*
92+
* Load the LDT if either the old or new mm had an LDT.
93+
*
94+
* An mm will never go from having an LDT to not having an LDT. Two
95+
* mms never share an LDT, so we don't gain anything by checking to
96+
* see whether the LDT changed. There's also no guarantee that
97+
* prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
98+
* then prev->context.ldt will also be non-NULL.
99+
*
100+
* If we really cared, we could optimize the case where prev == next
101+
* and we're exiting lazy mode. Most of the time, if this happens,
102+
* we don't actually need to reload LDTR, but modify_ldt() is mostly
103+
* used by legacy code and emulators where we don't need this level of
104+
* performance.
105+
*
106+
* This uses | instead of || because it generates better code.
107+
*/
108+
if (unlikely((unsigned long)prev->context.ldt |
109+
(unsigned long)next->context.ldt))
110+
load_mm_ldt(next);
111+
112+
DEBUG_LOCKS_WARN_ON(preemptible());
113+
}
31114

32115
static void refresh_ldt_segments(void)
33116
{

arch/x86/kernel/setup.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <asm/proto.h>
4242
#include <asm/unwind.h>
4343
#include <asm/vsyscall.h>
44+
#include <linux/vmalloc.h>
4445

4546
/*
4647
* max_low_pfn_mapped: highest directly mapped pfn < 4 GB

arch/x86/mm/fault.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
3030
#include <asm/desc.h> /* store_idt(), ... */
3131
#include <asm/cpu_entry_area.h> /* exception stack */
32+
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
3233

3334
#define CREATE_TRACE_POINTS
3435
#include <asm/trace/exceptions.h>

arch/x86/mm/init_32.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include <asm/page_types.h>
5353
#include <asm/cpu_entry_area.h>
5454
#include <asm/init.h>
55+
#include <asm/pgtable_areas.h>
5556

5657
#include "mm_internal.h"
5758

0 commit comments

Comments
 (0)