Skip to content

Commit 3b1d58a

Browse files
Dave ChinnerAl Viro
Dave Chinner
authored and
Al Viro
committed
list_lru: per-node list infrastructure
Now that we have an LRU list API, we can start to enhance the implementation. This splits the single LRU list into per-node lists and locks to enhance scalability. Items are placed on lists according to the node the memory belongs to. To make scanning the lists efficient, also track whether the per-node lists have entries in them in a active nodemask. Note: We use a fixed-size array for the node LRU, this struct can be very big if MAX_NUMNODES is big. If this becomes a problem this is fixable by turning this into a pointer and dynamically allocating this to nr_node_ids. This quantity is firwmare-provided, and still would provide room for all nodes at the cost of a pointer lookup and an extra allocation. Because that allocation will most likely come from a may very well fail. [[email protected]: fix warnings, added note about node lru] Signed-off-by: Dave Chinner <[email protected]> Signed-off-by: Glauber Costa <[email protected]> Reviewed-by: Greg Thelen <[email protected]> Acked-by: Mel Gorman <[email protected]> Cc: "Theodore Ts'o" <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Al Viro <[email protected]> Cc: Artem Bityutskiy <[email protected]> Cc: Arve Hjønnevåg <[email protected]> Cc: Carlos Maiolino <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Chuck Lever <[email protected]> Cc: Daniel Vetter <[email protected]> Cc: David Rientjes <[email protected]> Cc: Gleb Natapov <[email protected]> Cc: Greg Thelen <[email protected]> Cc: J. Bruce Fields <[email protected]> Cc: Jan Kara <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: John Stultz <[email protected]> Cc: KAMEZAWA Hiroyuki <[email protected]> Cc: Kent Overstreet <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Marcelo Tosatti <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Steven Whitehouse <[email protected]> Cc: Thomas Hellstrom <[email protected]> Cc: Trond Myklebust <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Al Viro <[email protected]>
1 parent f604156 commit 3b1d58a

File tree

2 files changed

+129
-40
lines changed

2 files changed

+129
-40
lines changed

include/linux/list_lru.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define _LRU_LIST_H
99

1010
#include <linux/list.h>
11+
#include <linux/nodemask.h>
1112

1213
/* list_lru_walk_cb has to always return one of those */
1314
enum lru_status {
@@ -18,11 +19,26 @@ enum lru_status {
1819
internally, but has to return locked. */
1920
};
2021

21-
struct list_lru {
22+
struct list_lru_node {
2223
spinlock_t lock;
2324
struct list_head list;
2425
/* kept as signed so we can catch imbalance bugs */
2526
long nr_items;
27+
} ____cacheline_aligned_in_smp;
28+
29+
struct list_lru {
30+
/*
31+
* Because we use a fixed-size array, this struct can be very big if
32+
* MAX_NUMNODES is big. If this becomes a problem this is fixable by
33+
* turning this into a pointer and dynamically allocating this to
34+
* nr_node_ids. This quantity is firwmare-provided, and still would
35+
* provide room for all nodes at the cost of a pointer lookup and an
36+
* extra allocation. Because that allocation will most likely come from
37+
* a different slab cache than the main structure holding this
38+
* structure, we may very well fail.
39+
*/
40+
struct list_lru_node node[MAX_NUMNODES];
41+
nodemask_t active_nodes;
2642
};
2743

2844
int list_lru_init(struct list_lru *lru);
@@ -66,10 +82,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item);
6682
* guarantee that the list is not updated while the count is being computed.
6783
* Callers that want such a guarantee need to provide an outer lock.
6884
*/
69-
static inline unsigned long list_lru_count(struct list_lru *lru)
70-
{
71-
return lru->nr_items;
72-
}
85+
unsigned long list_lru_count(struct list_lru *lru);
7386

7487
typedef enum lru_status
7588
(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg);

mm/list_lru.c

Lines changed: 111 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,41 +6,73 @@
66
*/
77
#include <linux/kernel.h>
88
#include <linux/module.h>
9+
#include <linux/mm.h>
910
#include <linux/list_lru.h>
1011

1112
bool list_lru_add(struct list_lru *lru, struct list_head *item)
1213
{
13-
spin_lock(&lru->lock);
14+
int nid = page_to_nid(virt_to_page(item));
15+
struct list_lru_node *nlru = &lru->node[nid];
16+
17+
spin_lock(&nlru->lock);
18+
WARN_ON_ONCE(nlru->nr_items < 0);
1419
if (list_empty(item)) {
15-
list_add_tail(item, &lru->list);
16-
lru->nr_items++;
17-
spin_unlock(&lru->lock);
20+
list_add_tail(item, &nlru->list);
21+
if (nlru->nr_items++ == 0)
22+
node_set(nid, lru->active_nodes);
23+
spin_unlock(&nlru->lock);
1824
return true;
1925
}
20-
spin_unlock(&lru->lock);
26+
spin_unlock(&nlru->lock);
2127
return false;
2228
}
2329
EXPORT_SYMBOL_GPL(list_lru_add);
2430

2531
bool list_lru_del(struct list_lru *lru, struct list_head *item)
2632
{
27-
spin_lock(&lru->lock);
33+
int nid = page_to_nid(virt_to_page(item));
34+
struct list_lru_node *nlru = &lru->node[nid];
35+
36+
spin_lock(&nlru->lock);
2837
if (!list_empty(item)) {
2938
list_del_init(item);
30-
lru->nr_items--;
31-
spin_unlock(&lru->lock);
39+
if (--nlru->nr_items == 0)
40+
node_clear(nid, lru->active_nodes);
41+
WARN_ON_ONCE(nlru->nr_items < 0);
42+
spin_unlock(&nlru->lock);
3243
return true;
3344
}
34-
spin_unlock(&lru->lock);
45+
spin_unlock(&nlru->lock);
3546
return false;
3647
}
3748
EXPORT_SYMBOL_GPL(list_lru_del);
3849

39-
unsigned long list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
40-
void *cb_arg, unsigned long nr_to_walk)
50+
unsigned long list_lru_count(struct list_lru *lru)
4151
{
52+
unsigned long count = 0;
53+
int nid;
54+
55+
for_each_node_mask(nid, lru->active_nodes) {
56+
struct list_lru_node *nlru = &lru->node[nid];
57+
58+
spin_lock(&nlru->lock);
59+
WARN_ON_ONCE(nlru->nr_items < 0);
60+
count += nlru->nr_items;
61+
spin_unlock(&nlru->lock);
62+
}
63+
64+
return count;
65+
}
66+
EXPORT_SYMBOL_GPL(list_lru_count);
67+
68+
static unsigned long
69+
list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate,
70+
void *cb_arg, unsigned long *nr_to_walk)
71+
{
72+
73+
struct list_lru_node *nlru = &lru->node[nid];
4274
struct list_head *item, *n;
43-
unsigned long removed = 0;
75+
unsigned long isolated = 0;
4476
/*
4577
* If we don't keep state of at which pass we are, we can loop at
4678
* LRU_RETRY, since we have no guarantees that the caller will be able
@@ -50,18 +82,20 @@ unsigned long list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
5082
*/
5183
bool first_pass = true;
5284

53-
spin_lock(&lru->lock);
85+
spin_lock(&nlru->lock);
5486
restart:
55-
list_for_each_safe(item, n, &lru->list) {
87+
list_for_each_safe(item, n, &nlru->list) {
5688
enum lru_status ret;
57-
ret = isolate(item, &lru->lock, cb_arg);
89+
ret = isolate(item, &nlru->lock, cb_arg);
5890
switch (ret) {
5991
case LRU_REMOVED:
60-
lru->nr_items--;
61-
removed++;
92+
if (--nlru->nr_items == 0)
93+
node_clear(nid, lru->active_nodes);
94+
WARN_ON_ONCE(nlru->nr_items < 0);
95+
isolated++;
6296
break;
6397
case LRU_ROTATE:
64-
list_move_tail(item, &lru->list);
98+
list_move_tail(item, &nlru->list);
6599
break;
66100
case LRU_SKIP:
67101
break;
@@ -76,42 +110,84 @@ unsigned long list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
76110
BUG();
77111
}
78112

79-
if (nr_to_walk-- == 0)
113+
if ((*nr_to_walk)-- == 0)
80114
break;
81115

82116
}
83-
spin_unlock(&lru->lock);
84-
return removed;
117+
118+
spin_unlock(&nlru->lock);
119+
return isolated;
120+
}
121+
EXPORT_SYMBOL_GPL(list_lru_walk_node);
122+
123+
unsigned long list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
124+
void *cb_arg, unsigned long nr_to_walk)
125+
{
126+
unsigned long isolated = 0;
127+
int nid;
128+
129+
for_each_node_mask(nid, lru->active_nodes) {
130+
isolated += list_lru_walk_node(lru, nid, isolate,
131+
cb_arg, &nr_to_walk);
132+
if (nr_to_walk <= 0)
133+
break;
134+
}
135+
return isolated;
85136
}
86137
EXPORT_SYMBOL_GPL(list_lru_walk);
87138

88-
unsigned long list_lru_dispose_all(struct list_lru *lru,
89-
list_lru_dispose_cb dispose)
139+
static unsigned long list_lru_dispose_all_node(struct list_lru *lru, int nid,
140+
list_lru_dispose_cb dispose)
90141
{
91-
unsigned long disposed = 0;
142+
struct list_lru_node *nlru = &lru->node[nid];
92143
LIST_HEAD(dispose_list);
144+
unsigned long disposed = 0;
93145

94-
spin_lock(&lru->lock);
95-
while (!list_empty(&lru->list)) {
96-
list_splice_init(&lru->list, &dispose_list);
97-
disposed += lru->nr_items;
98-
lru->nr_items = 0;
99-
spin_unlock(&lru->lock);
146+
spin_lock(&nlru->lock);
147+
while (!list_empty(&nlru->list)) {
148+
list_splice_init(&nlru->list, &dispose_list);
149+
disposed += nlru->nr_items;
150+
nlru->nr_items = 0;
151+
node_clear(nid, lru->active_nodes);
152+
spin_unlock(&nlru->lock);
100153

101154
dispose(&dispose_list);
102155

103-
spin_lock(&lru->lock);
156+
spin_lock(&nlru->lock);
104157
}
105-
spin_unlock(&lru->lock);
158+
spin_unlock(&nlru->lock);
106159
return disposed;
107160
}
108161

162+
unsigned long list_lru_dispose_all(struct list_lru *lru,
163+
list_lru_dispose_cb dispose)
164+
{
165+
unsigned long disposed;
166+
unsigned long total = 0;
167+
int nid;
168+
169+
do {
170+
disposed = 0;
171+
for_each_node_mask(nid, lru->active_nodes) {
172+
disposed += list_lru_dispose_all_node(lru, nid,
173+
dispose);
174+
}
175+
total += disposed;
176+
} while (disposed != 0);
177+
178+
return total;
179+
}
180+
109181
int list_lru_init(struct list_lru *lru)
110182
{
111-
spin_lock_init(&lru->lock);
112-
INIT_LIST_HEAD(&lru->list);
113-
lru->nr_items = 0;
183+
int i;
114184

185+
nodes_clear(lru->active_nodes);
186+
for (i = 0; i < MAX_NUMNODES; i++) {
187+
spin_lock_init(&lru->node[i].lock);
188+
INIT_LIST_HEAD(&lru->node[i].list);
189+
lru->node[i].nr_items = 0;
190+
}
115191
return 0;
116192
}
117193
EXPORT_SYMBOL_GPL(list_lru_init);

0 commit comments

Comments
 (0)