Skip to content

Commit 6290602

Browse files
npiggintorvalds
authored andcommitted
mm: add PageWaiters indicating tasks are waiting for a page bit
Add a new page flag, PageWaiters, to indicate the page waitqueue has tasks waiting. This can be tested rather than testing waitqueue_active which requires another cacheline load. This bit is always set when the page has tasks on page_waitqueue(page), and is set and cleared under the waitqueue lock. It may be set when there are no tasks on the waitqueue, which will cause a harmless extra wakeup check that will clears the bit. The generic bit-waitqueue infrastructure is no longer used for pages. Instead, waitqueues are used directly with a custom key type. The generic code was not flexible enough to have PageWaiters manipulation under the waitqueue lock (which simplifies concurrency). This improves the performance of page lock intensive microbenchmarks by 2-3%. Putting two bits in the same word opens the opportunity to remove the memory barrier between clearing the lock bit and testing the waiters bit, after some work on the arch primitives (e.g., ensuring memory operand widths match and cover both bits). Signed-off-by: Nicholas Piggin <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Bob Peterson <[email protected]> Cc: Steven Whitehouse <[email protected]> Cc: Andrew Lutomirski <[email protected]> Cc: Andreas Gruenbacher <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Mel Gorman <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 6326fec commit 6290602

File tree

9 files changed

+174
-50
lines changed

9 files changed

+174
-50
lines changed

include/linux/mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,6 +1758,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
17581758
return ptl;
17591759
}
17601760

1761+
extern void __init pagecache_init(void);
1762+
17611763
extern void free_area_init(unsigned long * zones_size);
17621764
extern void free_area_init_node(int nid, unsigned long * zones_size,
17631765
unsigned long zone_start_pfn, unsigned long *zholes_size);

include/linux/page-flags.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
*/
7474
enum pageflags {
7575
PG_locked, /* Page is locked. Don't touch. */
76+
PG_waiters, /* Page has waiters, check its waitqueue */
7677
PG_error,
7778
PG_referenced,
7879
PG_uptodate,
@@ -169,6 +170,9 @@ static __always_inline int PageCompound(struct page *page)
169170
* for compound page all operations related to the page flag applied to
170171
* head page.
171172
*
173+
* PF_ONLY_HEAD:
174+
* for compound page, callers only ever operate on the head page.
175+
*
172176
* PF_NO_TAIL:
173177
* modifications of the page flag must be done on small or head pages,
174178
* checks can be done on tail pages too.
@@ -178,6 +182,9 @@ static __always_inline int PageCompound(struct page *page)
178182
*/
179183
#define PF_ANY(page, enforce) page
180184
#define PF_HEAD(page, enforce) compound_head(page)
185+
#define PF_ONLY_HEAD(page, enforce) ({ \
186+
VM_BUG_ON_PGFLAGS(PageTail(page), page); \
187+
page;})
181188
#define PF_NO_TAIL(page, enforce) ({ \
182189
VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
183190
compound_head(page);})
@@ -255,6 +262,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
255262
TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
256263

257264
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
265+
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
258266
PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
259267
PAGEFLAG(Referenced, referenced, PF_HEAD)
260268
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
@@ -743,6 +751,7 @@ static inline int page_has_private(struct page *page)
743751

744752
#undef PF_ANY
745753
#undef PF_HEAD
754+
#undef PF_ONLY_HEAD
746755
#undef PF_NO_TAIL
747756
#undef PF_NO_COMPOUND
748757
#endif /* !__GENERATING_BOUNDS_H */

include/linux/pagemap.h

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -486,22 +486,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
486486
* and for filesystems which need to wait on PG_private.
487487
*/
488488
extern void wait_on_page_bit(struct page *page, int bit_nr);
489-
490489
extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
491-
extern int wait_on_page_bit_killable_timeout(struct page *page,
492-
int bit_nr, unsigned long timeout);
493-
494-
static inline int wait_on_page_locked_killable(struct page *page)
495-
{
496-
if (!PageLocked(page))
497-
return 0;
498-
return wait_on_page_bit_killable(compound_head(page), PG_locked);
499-
}
490+
extern void wake_up_page_bit(struct page *page, int bit_nr);
500491

501-
extern wait_queue_head_t *page_waitqueue(struct page *page);
502492
static inline void wake_up_page(struct page *page, int bit)
503493
{
504-
__wake_up_bit(page_waitqueue(page), &page->flags, bit);
494+
if (!PageWaiters(page))
495+
return;
496+
wake_up_page_bit(page, bit);
505497
}
506498

507499
/*
@@ -517,6 +509,13 @@ static inline void wait_on_page_locked(struct page *page)
517509
wait_on_page_bit(compound_head(page), PG_locked);
518510
}
519511

512+
static inline int wait_on_page_locked_killable(struct page *page)
513+
{
514+
if (!PageLocked(page))
515+
return 0;
516+
return wait_on_page_bit_killable(compound_head(page), PG_locked);
517+
}
518+
520519
/*
521520
* Wait for a page to complete writeback
522521
*/

include/linux/writeback.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
375375
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
376376

377377
void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
378-
void page_writeback_init(void);
379378
void balance_dirty_pages_ratelimited(struct address_space *mapping);
380379
bool wb_over_bg_thresh(struct bdi_writeback *wb);
381380

include/trace/events/mmflags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181

8282
#define __def_pageflag_names \
8383
{1UL << PG_locked, "locked" }, \
84+
{1UL << PG_waiters, "waiters" }, \
8485
{1UL << PG_error, "error" }, \
8586
{1UL << PG_referenced, "referenced" }, \
8687
{1UL << PG_uptodate, "uptodate" }, \

init/main.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -647,9 +647,8 @@ asmlinkage __visible void __init start_kernel(void)
647647
security_init();
648648
dbg_late_init();
649649
vfs_caches_init();
650+
pagecache_init();
650651
signals_init();
651-
/* rootfs populating might need page-writeback */
652-
page_writeback_init();
653652
proc_root_init();
654653
nsfs_init();
655654
cpuset_init();

mm/filemap.c

Lines changed: 146 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -739,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc);
739739
* at a cost of "thundering herd" phenomena during rare hash
740740
* collisions.
741741
*/
742-
wait_queue_head_t *page_waitqueue(struct page *page)
742+
#define PAGE_WAIT_TABLE_BITS 8
743+
#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
744+
static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
745+
746+
static wait_queue_head_t *page_waitqueue(struct page *page)
743747
{
744-
return bit_waitqueue(page, 0);
748+
return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
745749
}
746-
EXPORT_SYMBOL(page_waitqueue);
747750

748-
void wait_on_page_bit(struct page *page, int bit_nr)
751+
void __init pagecache_init(void)
749752
{
750-
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
753+
int i;
751754

752-
if (test_bit(bit_nr, &page->flags))
753-
__wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
754-
TASK_UNINTERRUPTIBLE);
755+
for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
756+
init_waitqueue_head(&page_wait_table[i]);
757+
758+
page_writeback_init();
755759
}
756-
EXPORT_SYMBOL(wait_on_page_bit);
757760

758-
int wait_on_page_bit_killable(struct page *page, int bit_nr)
761+
struct wait_page_key {
762+
struct page *page;
763+
int bit_nr;
764+
int page_match;
765+
};
766+
767+
struct wait_page_queue {
768+
struct page *page;
769+
int bit_nr;
770+
wait_queue_t wait;
771+
};
772+
773+
static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
759774
{
760-
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
775+
struct wait_page_key *key = arg;
776+
struct wait_page_queue *wait_page
777+
= container_of(wait, struct wait_page_queue, wait);
778+
779+
if (wait_page->page != key->page)
780+
return 0;
781+
key->page_match = 1;
761782

762-
if (!test_bit(bit_nr, &page->flags))
783+
if (wait_page->bit_nr != key->bit_nr)
784+
return 0;
785+
if (test_bit(key->bit_nr, &key->page->flags))
763786
return 0;
764787

765-
return __wait_on_bit(page_waitqueue(page), &wait,
766-
bit_wait_io, TASK_KILLABLE);
788+
return autoremove_wake_function(wait, mode, sync, key);
767789
}
768790

769-
int wait_on_page_bit_killable_timeout(struct page *page,
770-
int bit_nr, unsigned long timeout)
791+
void wake_up_page_bit(struct page *page, int bit_nr)
771792
{
772-
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
793+
wait_queue_head_t *q = page_waitqueue(page);
794+
struct wait_page_key key;
795+
unsigned long flags;
773796

774-
wait.key.timeout = jiffies + timeout;
775-
if (!test_bit(bit_nr, &page->flags))
776-
return 0;
777-
return __wait_on_bit(page_waitqueue(page), &wait,
778-
bit_wait_io_timeout, TASK_KILLABLE);
797+
key.page = page;
798+
key.bit_nr = bit_nr;
799+
key.page_match = 0;
800+
801+
spin_lock_irqsave(&q->lock, flags);
802+
__wake_up_locked_key(q, TASK_NORMAL, &key);
803+
/*
804+
* It is possible for other pages to have collided on the waitqueue
805+
* hash, so in that case check for a page match. That prevents a long-
806+
* term waiter
807+
*
808+
* It is still possible to miss a case here, when we woke page waiters
809+
* and removed them from the waitqueue, but there are still other
810+
* page waiters.
811+
*/
812+
if (!waitqueue_active(q) || !key.page_match) {
813+
ClearPageWaiters(page);
814+
/*
815+
* It's possible to miss clearing Waiters here, when we woke
816+
* our page waiters, but the hashed waitqueue has waiters for
817+
* other pages on it.
818+
*
819+
* That's okay, it's a rare case. The next waker will clear it.
820+
*/
821+
}
822+
spin_unlock_irqrestore(&q->lock, flags);
823+
}
824+
EXPORT_SYMBOL(wake_up_page_bit);
825+
826+
static inline int wait_on_page_bit_common(wait_queue_head_t *q,
827+
struct page *page, int bit_nr, int state, bool lock)
828+
{
829+
struct wait_page_queue wait_page;
830+
wait_queue_t *wait = &wait_page.wait;
831+
int ret = 0;
832+
833+
init_wait(wait);
834+
wait->func = wake_page_function;
835+
wait_page.page = page;
836+
wait_page.bit_nr = bit_nr;
837+
838+
for (;;) {
839+
spin_lock_irq(&q->lock);
840+
841+
if (likely(list_empty(&wait->task_list))) {
842+
if (lock)
843+
__add_wait_queue_tail_exclusive(q, wait);
844+
else
845+
__add_wait_queue(q, wait);
846+
SetPageWaiters(page);
847+
}
848+
849+
set_current_state(state);
850+
851+
spin_unlock_irq(&q->lock);
852+
853+
if (likely(test_bit(bit_nr, &page->flags))) {
854+
io_schedule();
855+
if (unlikely(signal_pending_state(state, current))) {
856+
ret = -EINTR;
857+
break;
858+
}
859+
}
860+
861+
if (lock) {
862+
if (!test_and_set_bit_lock(bit_nr, &page->flags))
863+
break;
864+
} else {
865+
if (!test_bit(bit_nr, &page->flags))
866+
break;
867+
}
868+
}
869+
870+
finish_wait(q, wait);
871+
872+
/*
873+
* A signal could leave PageWaiters set. Clearing it here if
874+
* !waitqueue_active would be possible (by open-coding finish_wait),
875+
* but still fail to catch it in the case of wait hash collision. We
876+
* already can fail to clear wait hash collision cases, so don't
877+
* bother with signals either.
878+
*/
879+
880+
return ret;
881+
}
882+
883+
void wait_on_page_bit(struct page *page, int bit_nr)
884+
{
885+
wait_queue_head_t *q = page_waitqueue(page);
886+
wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
887+
}
888+
EXPORT_SYMBOL(wait_on_page_bit);
889+
890+
int wait_on_page_bit_killable(struct page *page, int bit_nr)
891+
{
892+
wait_queue_head_t *q = page_waitqueue(page);
893+
return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
779894
}
780-
EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
781895

782896
/**
783897
* add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
@@ -793,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
793907

794908
spin_lock_irqsave(&q->lock, flags);
795909
__add_wait_queue(q, waiter);
910+
SetPageWaiters(page);
796911
spin_unlock_irqrestore(&q->lock, flags);
797912
}
798913
EXPORT_SYMBOL_GPL(add_page_wait_queue);
@@ -874,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio);
874989
* __lock_page - get a lock on the page, assuming we need to sleep to get it
875990
* @page: the page to lock
876991
*/
877-
void __lock_page(struct page *page)
992+
void __lock_page(struct page *__page)
878993
{
879-
struct page *page_head = compound_head(page);
880-
DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
881-
882-
__wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
883-
TASK_UNINTERRUPTIBLE);
994+
struct page *page = compound_head(__page);
995+
wait_queue_head_t *q = page_waitqueue(page);
996+
wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
884997
}
885998
EXPORT_SYMBOL(__lock_page);
886999

887-
int __lock_page_killable(struct page *page)
1000+
int __lock_page_killable(struct page *__page)
8881001
{
889-
struct page *page_head = compound_head(page);
890-
DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
891-
892-
return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
893-
bit_wait_io, TASK_KILLABLE);
1002+
struct page *page = compound_head(__page);
1003+
wait_queue_head_t *q = page_waitqueue(page);
1004+
return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
8941005
}
8951006
EXPORT_SYMBOL_GPL(__lock_page_killable);
8961007

mm/internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
/* Do not use these with a slab allocator */
3737
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
3838

39+
void page_writeback_init(void);
40+
3941
int do_swap_page(struct vm_fault *vmf);
4042

4143
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,

mm/swap.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ static void __page_cache_release(struct page *page)
6969
del_page_from_lru_list(page, lruvec, page_off_lru(page));
7070
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
7171
}
72+
__ClearPageWaiters(page);
7273
mem_cgroup_uncharge(page);
7374
}
7475

@@ -784,6 +785,7 @@ void release_pages(struct page **pages, int nr, bool cold)
784785

785786
/* Clear Active bit in case of parallel mark_page_accessed */
786787
__ClearPageActive(page);
788+
__ClearPageWaiters(page);
787789

788790
list_add(&page->lru, &pages_to_free);
789791
}

0 commit comments

Comments
 (0)