Skip to content

Commit a528910

Browse files
hnaztorvalds
authored andcommitted
mm: thrash detection-based file cache sizing
The VM maintains cached filesystem pages on two types of lists. One list holds the pages recently faulted into the cache, the other list holds pages that have been referenced repeatedly on that first list. The idea is to prefer reclaiming young pages over those that have shown to benefit from caching in the past. We call the recently usedbut ultimately was not significantly better than a FIFO policy and still thrashed cache based on eviction speed, rather than actual demand for cache. This patch solves one half of the problem by decoupling the ability to detect working set changes from the inactive list size. By maintaining a history of recently evicted file pages it can detect frequently used pages with an arbitrarily small inactive list size, and subsequently apply pressure on the active list based on actual demand for cache, not just overall eviction speed. Every zone maintains a counter that tracks inactive list aging speed. When a page is evicted, a snapshot of this counter is stored in the now-empty page cache radix tree slot. On refault, the minimum access distance of the page can be assessed, to evaluate whether the page should be part of the active list or not. This fixes the VM's blindness towards working set changes in excess of the inactive list. And it's the foundation to further improve the protection ability and reduce the minimum inactive list size of 50%. Signed-off-by: Johannes Weiner <[email protected]> Reviewed-by: Rik van Riel <[email protected]> Reviewed-by: Minchan Kim <[email protected]> Reviewed-by: Bob Liu <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Greg Thelen <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Jan Kara <[email protected]> Cc: KOSAKI Motohiro <[email protected]> Cc: Luigi Semenzato <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Metin Doslu <[email protected]> Cc: Michel Lespinasse <[email protected]> Cc: Ozgun Erdogan <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Ryan Mallon <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 91b0abe commit a528910

File tree

8 files changed

+331
-23
lines changed

8 files changed

+331
-23
lines changed

include/linux/mmzone.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ enum zone_stat_item {
142142
NUMA_LOCAL, /* allocation from local node */
143143
NUMA_OTHER, /* allocation from other node */
144144
#endif
145+
WORKINGSET_REFAULT,
146+
WORKINGSET_ACTIVATE,
145147
NR_ANON_TRANSPARENT_HUGEPAGES,
146148
NR_FREE_CMA_PAGES,
147149
NR_VM_ZONE_STAT_ITEMS };
@@ -392,6 +394,9 @@ struct zone {
392394
spinlock_t lru_lock;
393395
struct lruvec lruvec;
394396

397+
/* Evictions & activations on the inactive file list */
398+
atomic_long_t inactive_age;
399+
395400
unsigned long pages_scanned; /* since last reclaim */
396401
unsigned long flags; /* zone flags, see below */
397402

include/linux/swap.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,11 @@ struct swap_list_t {
260260
int next; /* swapfile to be used next */
261261
};
262262

263+
/* linux/mm/workingset.c */
264+
void *workingset_eviction(struct address_space *mapping, struct page *page);
265+
bool workingset_refault(void *shadow);
266+
void workingset_activation(struct page *page);
267+
263268
/* linux/mm/page_alloc.c */
264269
extern unsigned long totalram_pages;
265270
extern unsigned long totalreserve_pages;

mm/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
1717
util.o mmzone.o vmstat.o backing-dev.o \
1818
mm_init.o mmu_context.o percpu.o slab_common.o \
1919
compaction.o balloon_compaction.o \
20-
interval_tree.o list_lru.o $(mmu-y)
20+
interval_tree.o list_lru.o workingset.o $(mmu-y)
2121

2222
obj-y += init-mm.o
2323

mm/filemap.c

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
469469
EXPORT_SYMBOL_GPL(replace_page_cache_page);
470470

471471
static int page_cache_tree_insert(struct address_space *mapping,
472-
struct page *page)
472+
struct page *page, void **shadowp)
473473
{
474474
void **slot;
475475
int error;
@@ -484,6 +484,8 @@ static int page_cache_tree_insert(struct address_space *mapping,
484484
radix_tree_replace_slot(slot, page);
485485
mapping->nrshadows--;
486486
mapping->nrpages++;
487+
if (shadowp)
488+
*shadowp = p;
487489
return 0;
488490
}
489491
error = radix_tree_insert(&mapping->page_tree, page->index, page);
@@ -492,18 +494,10 @@ static int page_cache_tree_insert(struct address_space *mapping,
492494
return error;
493495
}
494496

495-
/**
496-
* add_to_page_cache_locked - add a locked page to the pagecache
497-
* @page: page to add
498-
* @mapping: the page's address_space
499-
* @offset: page index
500-
* @gfp_mask: page allocation mode
501-
*
502-
* This function is used to add a page to the pagecache. It must be locked.
503-
* This function does not add the page to the LRU. The caller must do that.
504-
*/
505-
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
506-
pgoff_t offset, gfp_t gfp_mask)
497+
static int __add_to_page_cache_locked(struct page *page,
498+
struct address_space *mapping,
499+
pgoff_t offset, gfp_t gfp_mask,
500+
void **shadowp)
507501
{
508502
int error;
509503

@@ -526,7 +520,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
526520
page->index = offset;
527521

528522
spin_lock_irq(&mapping->tree_lock);
529-
error = page_cache_tree_insert(mapping, page);
523+
error = page_cache_tree_insert(mapping, page, shadowp);
530524
radix_tree_preload_end();
531525
if (unlikely(error))
532526
goto err_insert;
@@ -542,16 +536,49 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
542536
page_cache_release(page);
543537
return error;
544538
}
539+
540+
/**
541+
* add_to_page_cache_locked - add a locked page to the pagecache
542+
* @page: page to add
543+
* @mapping: the page's address_space
544+
* @offset: page index
545+
* @gfp_mask: page allocation mode
546+
*
547+
* This function is used to add a page to the pagecache. It must be locked.
548+
* This function does not add the page to the LRU. The caller must do that.
549+
*/
550+
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
551+
pgoff_t offset, gfp_t gfp_mask)
552+
{
553+
return __add_to_page_cache_locked(page, mapping, offset,
554+
gfp_mask, NULL);
555+
}
545556
EXPORT_SYMBOL(add_to_page_cache_locked);
546557

547558
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
548559
pgoff_t offset, gfp_t gfp_mask)
549560
{
561+
void *shadow = NULL;
550562
int ret;
551563

552-
ret = add_to_page_cache(page, mapping, offset, gfp_mask);
553-
if (ret == 0)
554-
lru_cache_add_file(page);
564+
__set_page_locked(page);
565+
ret = __add_to_page_cache_locked(page, mapping, offset,
566+
gfp_mask, &shadow);
567+
if (unlikely(ret))
568+
__clear_page_locked(page);
569+
else {
570+
/*
571+
* The page might have been evicted from cache only
572+
* recently, in which case it should be activated like
573+
* any other repeatedly accessed page.
574+
*/
575+
if (shadow && workingset_refault(shadow)) {
576+
SetPageActive(page);
577+
workingset_activation(page);
578+
} else
579+
ClearPageActive(page);
580+
lru_cache_add(page);
581+
}
555582
return ret;
556583
}
557584
EXPORT_SYMBOL_GPL(add_to_page_cache_lru);

mm/swap.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,8 @@ void mark_page_accessed(struct page *page)
574574
else
575575
__lru_cache_activate_page(page);
576576
ClearPageReferenced(page);
577+
if (page_is_file_cache(page))
578+
workingset_activation(page);
577579
} else if (!PageReferenced(page)) {
578580
SetPageReferenced(page);
579581
}

mm/vmscan.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
523523
* Same as remove_mapping, but if the page is removed from the mapping, it
524524
* gets returned with a refcount of 0.
525525
*/
526-
static int __remove_mapping(struct address_space *mapping, struct page *page)
526+
static int __remove_mapping(struct address_space *mapping, struct page *page,
527+
bool reclaimed)
527528
{
528529
BUG_ON(!PageLocked(page));
529530
BUG_ON(mapping != page_mapping(page));
@@ -569,10 +570,23 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
569570
swapcache_free(swap, page);
570571
} else {
571572
void (*freepage)(struct page *);
573+
void *shadow = NULL;
572574

573575
freepage = mapping->a_ops->freepage;
574-
575-
__delete_from_page_cache(page, NULL);
576+
/*
577+
* Remember a shadow entry for reclaimed file cache in
578+
* order to detect refaults, thus thrashing, later on.
579+
*
580+
* But don't store shadows in an address space that is
581+
* already exiting. This is not just an optizimation,
582+
* inode reclaim needs to empty out the radix tree or
583+
* the nodes are lost. Don't plant shadows behind its
584+
* back.
585+
*/
586+
if (reclaimed && page_is_file_cache(page) &&
587+
!mapping_exiting(mapping))
588+
shadow = workingset_eviction(mapping, page);
589+
__delete_from_page_cache(page, shadow);
576590
spin_unlock_irq(&mapping->tree_lock);
577591
mem_cgroup_uncharge_cache_page(page);
578592

@@ -595,7 +609,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
595609
*/
596610
int remove_mapping(struct address_space *mapping, struct page *page)
597611
{
598-
if (__remove_mapping(mapping, page)) {
612+
if (__remove_mapping(mapping, page, false)) {
599613
/*
600614
* Unfreezing the refcount with 1 rather than 2 effectively
601615
* drops the pagecache ref for us without requiring another
@@ -1065,7 +1079,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
10651079
}
10661080
}
10671081

1068-
if (!mapping || !__remove_mapping(mapping, page))
1082+
if (!mapping || !__remove_mapping(mapping, page, true))
10691083
goto keep_locked;
10701084

10711085
/*

mm/vmstat.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,8 @@ const char * const vmstat_text[] = {
770770
"numa_local",
771771
"numa_other",
772772
#endif
773+
"workingset_refault",
774+
"workingset_activate",
773775
"nr_anon_transparent_hugepages",
774776
"nr_free_cma",
775777
"nr_dirty_threshold",

0 commit comments

Comments
 (0)