Skip to content

Commit 7675076

Browse files
rpptakpm00
authored andcommitted
arch_numa: switch over to numa_memblks
Until now arch_numa was directly translating firmware NUMA information to memblock. Using numa_memblks as an intermediate step has a few advantages: * alignment with more battle tested x86 implementation * availability of NUMA emulation * maintaining node information for not yet populated memory Adjust a few places in numa_memblks to compile with 32-bit phys_addr_t and replace current functionality related to numa_add_memblk() and __node_distance() in arch_numa with the implementation based on numa_memblks and add functions required by numa_emulation. [[email protected]: fix section mismatch] Link: https://lkml.kernel.org/r/[email protected] [[email protected]: PFN_PHYS() translation is unnecessary here] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Mike Rapoport (Microsoft) <[email protected]> Tested-by: Zi Yan <[email protected]> # for x86_64 and arm64 Reviewed-by: Jonathan Cameron <[email protected]> Tested-by: Jonathan Cameron <[email protected]> [arm64 + CXL via QEMU] Acked-by: Dan Williams <[email protected]> Acked-by: David Hildenbrand <[email protected]> Cc: Alexander Gordeev <[email protected]> Cc: Andreas Larsson <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christophe Leroy <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Davidlohr Bueso <[email protected]> Cc: David S. Miller <[email protected]> Cc: Greg Kroah-Hartman <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Huacai Chen <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiaxun Yang <[email protected]> Cc: John Paul Adrian Glaubitz <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Palmer Dabbelt <[email protected]> Cc: Rafael J. Wysocki <[email protected]> Cc: Rob Herring (Arm) <[email protected]> Cc: Samuel Holland <[email protected]> Cc: Thomas Bogendoerfer <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Will Deacon <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 7e48867 commit 7675076

File tree

4 files changed

+76
-151
lines changed

4 files changed

+76
-151
lines changed

drivers/base/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ config GENERIC_ARCH_TOPOLOGY
226226

227227
config GENERIC_ARCH_NUMA
228228
bool
229+
select NUMA_MEMBLKS
229230
help
230231
Enable support for generic NUMA implementation. Currently, RISC-V
231232
and ARM64 use it.

drivers/base/arch_numa.c

Lines changed: 60 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,12 @@
1212
#include <linux/memblock.h>
1313
#include <linux/module.h>
1414
#include <linux/of.h>
15+
#include <linux/numa_memblks.h>
1516

1617
#include <asm/sections.h>
1718

18-
nodemask_t numa_nodes_parsed __initdata;
1919
static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
2020

21-
static int numa_distance_cnt;
22-
static u8 *numa_distance;
2321
bool numa_off;
2422

2523
static __init int numa_parse_early_param(char *opt)
@@ -28,6 +26,8 @@ static __init int numa_parse_early_param(char *opt)
2826
return -EINVAL;
2927
if (str_has_prefix(opt, "off"))
3028
numa_off = true;
29+
if (!strncmp(opt, "fake=", 5))
30+
return numa_emu_cmdline(opt + 5);
3131

3232
return 0;
3333
}
@@ -59,6 +59,7 @@ EXPORT_SYMBOL(cpumask_of_node);
5959

6060
#endif
6161

62+
#ifndef CONFIG_NUMA_EMU
6263
static void numa_update_cpu(unsigned int cpu, bool remove)
6364
{
6465
int nid = cpu_to_node(cpu);
@@ -81,6 +82,7 @@ void numa_remove_cpu(unsigned int cpu)
8182
{
8283
numa_update_cpu(cpu, true);
8384
}
85+
#endif
8486

8587
void numa_clear_node(unsigned int cpu)
8688
{
@@ -142,7 +144,7 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid)
142144
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
143145
EXPORT_SYMBOL(__per_cpu_offset);
144146

145-
int __init early_cpu_to_node(int cpu)
147+
int early_cpu_to_node(int cpu)
146148
{
147149
return cpu_to_node_map[cpu];
148150
}
@@ -187,30 +189,6 @@ void __init setup_per_cpu_areas(void)
187189
}
188190
#endif
189191

190-
/**
191-
* numa_add_memblk() - Set node id to memblk
192-
* @nid: NUMA node ID of the new memblk
193-
* @start: Start address of the new memblk
194-
* @end: End address of the new memblk
195-
*
196-
* RETURNS:
197-
* 0 on success, -errno on failure.
198-
*/
199-
int __init numa_add_memblk(int nid, u64 start, u64 end)
200-
{
201-
int ret;
202-
203-
ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
204-
if (ret < 0) {
205-
pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
206-
start, (end - 1), nid);
207-
return ret;
208-
}
209-
210-
node_set(nid, numa_nodes_parsed);
211-
return ret;
212-
}
213-
214192
/*
215193
* Initialize NODE_DATA for a node on the local memory
216194
*/
@@ -226,116 +204,9 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
226204
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
227205
}
228206

229-
/*
230-
* numa_free_distance
231-
*
232-
* The current table is freed.
233-
*/
234-
void __init numa_free_distance(void)
235-
{
236-
size_t size;
237-
238-
if (!numa_distance)
239-
return;
240-
241-
size = numa_distance_cnt * numa_distance_cnt *
242-
sizeof(numa_distance[0]);
243-
244-
memblock_free(numa_distance, size);
245-
numa_distance_cnt = 0;
246-
numa_distance = NULL;
247-
}
248-
249-
/*
250-
* Create a new NUMA distance table.
251-
*/
252-
static int __init numa_alloc_distance(void)
253-
{
254-
size_t size;
255-
int i, j;
256-
257-
size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
258-
numa_distance = memblock_alloc(size, PAGE_SIZE);
259-
if (WARN_ON(!numa_distance))
260-
return -ENOMEM;
261-
262-
numa_distance_cnt = nr_node_ids;
263-
264-
/* fill with the default distances */
265-
for (i = 0; i < numa_distance_cnt; i++)
266-
for (j = 0; j < numa_distance_cnt; j++)
267-
numa_distance[i * numa_distance_cnt + j] = i == j ?
268-
LOCAL_DISTANCE : REMOTE_DISTANCE;
269-
270-
pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt);
271-
272-
return 0;
273-
}
274-
275-
/**
276-
* numa_set_distance() - Set inter node NUMA distance from node to node.
277-
* @from: the 'from' node to set distance
278-
* @to: the 'to' node to set distance
279-
* @distance: NUMA distance
280-
*
281-
* Set the distance from node @from to @to to @distance.
282-
* If distance table doesn't exist, a warning is printed.
283-
*
284-
* If @from or @to is higher than the highest known node or lower than zero
285-
* or @distance doesn't make sense, the call is ignored.
286-
*/
287-
void __init numa_set_distance(int from, int to, int distance)
288-
{
289-
if (!numa_distance) {
290-
pr_warn_once("Warning: distance table not allocated yet\n");
291-
return;
292-
}
293-
294-
if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
295-
from < 0 || to < 0) {
296-
pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
297-
from, to, distance);
298-
return;
299-
}
300-
301-
if ((u8)distance != distance ||
302-
(from == to && distance != LOCAL_DISTANCE)) {
303-
pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
304-
from, to, distance);
305-
return;
306-
}
307-
308-
numa_distance[from * numa_distance_cnt + to] = distance;
309-
}
310-
311-
/*
312-
* Return NUMA distance @from to @to
313-
*/
314-
int __node_distance(int from, int to)
315-
{
316-
if (from >= numa_distance_cnt || to >= numa_distance_cnt)
317-
return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
318-
return numa_distance[from * numa_distance_cnt + to];
319-
}
320-
EXPORT_SYMBOL(__node_distance);
321-
322207
static int __init numa_register_nodes(void)
323208
{
324209
int nid;
325-
struct memblock_region *mblk;
326-
327-
/* Check that valid nid is set to memblks */
328-
for_each_mem_region(mblk) {
329-
int mblk_nid = memblock_get_region_node(mblk);
330-
phys_addr_t start = mblk->base;
331-
phys_addr_t end = mblk->base + mblk->size - 1;
332-
333-
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
334-
pr_warn("Warning: invalid memblk node %d [mem %pap-%pap]\n",
335-
mblk_nid, &start, &end);
336-
return -EINVAL;
337-
}
338-
}
339210

340211
/* Finally register nodes. */
341212
for_each_node_mask(nid, numa_nodes_parsed) {
@@ -360,11 +231,7 @@ static int __init numa_init(int (*init_func)(void))
360231
nodes_clear(node_possible_map);
361232
nodes_clear(node_online_map);
362233

363-
ret = numa_alloc_distance();
364-
if (ret < 0)
365-
return ret;
366-
367-
ret = init_func();
234+
ret = numa_memblks_init(init_func, /* memblock_force_top_down */ false);
368235
if (ret < 0)
369236
goto out_free_distance;
370237

@@ -382,7 +249,7 @@ static int __init numa_init(int (*init_func)(void))
382249

383250
return 0;
384251
out_free_distance:
385-
numa_free_distance();
252+
numa_reset_distance();
386253
return ret;
387254
}
388255

@@ -412,6 +279,7 @@ static int __init dummy_numa_init(void)
412279
pr_err("NUMA init failed\n");
413280
return ret;
414281
}
282+
node_set(0, numa_nodes_parsed);
415283

416284
numa_off = true;
417285
return 0;
@@ -454,3 +322,54 @@ void __init arch_numa_init(void)
454322

455323
numa_init(dummy_numa_init);
456324
}
325+
326+
#ifdef CONFIG_NUMA_EMU
327+
void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys,
328+
unsigned int nr_emu_nids)
329+
{
330+
int i, j;
331+
332+
/*
333+
* Transform cpu_to_node_map table to use emulated nids by
334+
* reverse-mapping phys_nid. The maps should always exist but fall
335+
* back to zero just in case.
336+
*/
337+
for (i = 0; i < ARRAY_SIZE(cpu_to_node_map); i++) {
338+
if (cpu_to_node_map[i] == NUMA_NO_NODE)
339+
continue;
340+
for (j = 0; j < nr_emu_nids; j++)
341+
if (cpu_to_node_map[i] == emu_nid_to_phys[j])
342+
break;
343+
cpu_to_node_map[i] = j < nr_emu_nids ? j : 0;
344+
}
345+
}
346+
347+
u64 __init numa_emu_dma_end(void)
348+
{
349+
return memblock_start_of_DRAM() + SZ_4G;
350+
}
351+
352+
void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable)
353+
{
354+
struct cpumask *mask;
355+
356+
if (node == NUMA_NO_NODE)
357+
return;
358+
359+
mask = node_to_cpumask_map[node];
360+
if (!cpumask_available(mask)) {
361+
pr_err("node_to_cpumask_map[%i] NULL\n", node);
362+
dump_stack();
363+
return;
364+
}
365+
366+
if (enable)
367+
cpumask_set_cpu(cpu, mask);
368+
else
369+
cpumask_clear_cpu(cpu, mask);
370+
371+
pr_debug("%s cpu %d node %d: mask now %*pbl\n",
372+
enable ? "numa_add_cpu" : "numa_remove_cpu",
373+
cpu, node, cpumask_pr_args(mask));
374+
}
375+
#endif /* CONFIG_NUMA_EMU */

include/asm-generic/numa.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
3232

3333
void __init arch_numa_init(void);
3434
int __init numa_add_memblk(int nodeid, u64 start, u64 end);
35-
void __init numa_set_distance(int from, int to, int distance);
36-
void __init numa_free_distance(void);
3735
void __init early_map_cpu_to_node(unsigned int cpu, int nid);
38-
int __init early_cpu_to_node(int cpu);
36+
int early_cpu_to_node(int cpu);
3937
void numa_store_cpu_info(unsigned int cpu);
4038
void numa_add_cpu(unsigned int cpu);
4139
void numa_remove_cpu(unsigned int cpu);
@@ -51,4 +49,8 @@ static inline int early_cpu_to_node(int cpu) { return 0; }
5149

5250
#endif /* CONFIG_NUMA */
5351

52+
#ifdef CONFIG_NUMA_EMU
53+
void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
54+
#endif
55+
5456
#endif /* __ASM_GENERIC_NUMA_H */

mm/numa_memblks.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,12 @@ static int __init numa_register_meminfo(struct numa_meminfo *mi)
405405
unsigned long pfn_align = node_map_pfn_alignment();
406406

407407
if (pfn_align && pfn_align < PAGES_PER_SECTION) {
408-
pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
409-
PFN_PHYS(pfn_align) >> 20,
410-
PFN_PHYS(PAGES_PER_SECTION) >> 20);
408+
unsigned long node_align_mb = PFN_PHYS(pfn_align) >> 20;
409+
410+
unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) >> 20;
411+
412+
pr_warn("Node alignment %luMB < min %luMB, rejecting NUMA config\n",
413+
node_align_mb, sect_align_mb);
411414
return -EINVAL;
412415
}
413416
}
@@ -418,18 +421,18 @@ static int __init numa_register_meminfo(struct numa_meminfo *mi)
418421
int __init numa_memblks_init(int (*init_func)(void),
419422
bool memblock_force_top_down)
420423
{
424+
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
421425
int ret;
422426

423427
nodes_clear(numa_nodes_parsed);
424428
nodes_clear(node_possible_map);
425429
nodes_clear(node_online_map);
426430
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
427-
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
428-
NUMA_NO_NODE));
429-
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
431+
WARN_ON(memblock_set_node(0, max_addr, &memblock.memory, NUMA_NO_NODE));
432+
WARN_ON(memblock_set_node(0, max_addr, &memblock.reserved,
430433
NUMA_NO_NODE));
431434
/* In case that parsing SRAT failed. */
432-
WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
435+
WARN_ON(memblock_clear_hotplug(0, max_addr));
433436
numa_reset_distance();
434437

435438
ret = init_func();

0 commit comments

Comments
 (0)