Skip to content

Commit 9254ada

Browse files
djbwgregkh
authored andcommitted
device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation
commit 956a4cd upstream. The following warning triggers with a new unit test that stresses the device-dax interface. =============================== [ ERR: suspicious RCU usage. ] 4.11.0-rc4+ #1049 Tainted: G O ------------------------------- ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 2 locks held by fio/9070: #0: (&mm->mmap_sem){++++++}, at: [<ffffffff8d0739d7>] __do_page_fault+0x167/0x4f0 #1: (rcu_read_lock){......}, at: [<ffffffffc03fbd02>] dax_dev_huge_fault+0x32/0x620 [dax] Call Trace: dump_stack+0x86/0xc3 lockdep_rcu_suspicious+0xd7/0x110 ___might_sleep+0xac/0x250 __might_sleep+0x4a/0x80 __alloc_pages_nodemask+0x23a/0x360 alloc_pages_current+0xa1/0x1f0 pte_alloc_one+0x17/0x80 __pte_alloc+0x1e/0x120 __get_locked_pte+0x1bf/0x1d0 insert_pfn.isra.70+0x3a/0x100 ? lookup_memtype+0xa6/0xd0 vm_insert_mixed+0x64/0x90 dax_dev_huge_fault+0x520/0x620 [dax] ? dax_dev_huge_fault+0x32/0x620 [dax] dax_dev_fault+0x10/0x20 [dax] __do_fault+0x1e/0x140 __handle_mm_fault+0x9af/0x10d0 handle_mm_fault+0x16d/0x370 ? handle_mm_fault+0x47/0x370 __do_page_fault+0x28c/0x4f0 trace_do_page_fault+0x58/0x2a0 do_async_page_fault+0x1a/0xa0 async_page_fault+0x28/0x30 Inserting a page table entry may trigger an allocation while we are holding a read lock to keep the device instance alive for the duration of the fault. Use srcu for this keep-alive protection. Fixes: dee4107 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 7d1c1be commit 9254ada

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

drivers/dax/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
22
tristate "DAX: direct access to differentiated memory"
33
default m if NVDIMM_DAX
44
depends on TRANSPARENT_HUGEPAGE
5+
select SRCU
56
help
67
Support raw access to differentiated (persistence, bandwidth,
78
latency...) memory via an mmap(2) capable character

drivers/dax/dax.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "dax.h"
2525

2626
static dev_t dax_devt;
27+
DEFINE_STATIC_SRCU(dax_srcu);
2728
static struct class *dax_class;
2829
static DEFINE_IDA(dax_minor_ida);
2930
static int nr_dax = CONFIG_NR_DEV_DAX;
@@ -59,7 +60,7 @@ struct dax_region {
5960
* @region - parent region
6061
* @dev - device backing the character device
6162
* @cdev - core chardev data
62-
* @alive - !alive + rcu grace period == no new mappings can be established
63+
* @alive - !alive + srcu grace period == no new mappings can be established
6364
* @id - child id in the region
6465
* @num_resources - number of physical address extents in this device
6566
* @res - array of physical address ranges
@@ -530,17 +531,17 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
530531
static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
531532
pmd_t *pmd, unsigned int flags)
532533
{
533-
int rc;
534+
int rc, id;
534535
struct file *filp = vma->vm_file;
535536
struct dax_dev *dax_dev = filp->private_data;
536537

537538
dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
538539
current->comm, (flags & FAULT_FLAG_WRITE)
539540
? "write" : "read", vma->vm_start, vma->vm_end);
540541

541-
rcu_read_lock();
542+
id = srcu_read_lock(&dax_srcu);
542543
rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
543-
rcu_read_unlock();
544+
srcu_read_unlock(&dax_srcu, id);
544545

545546
return rc;
546547
}
@@ -656,11 +657,11 @@ static void unregister_dax_dev(void *dev)
656657
* Note, rcu is not protecting the liveness of dax_dev, rcu is
657658
* ensuring that any fault handlers that might have seen
658659
* dax_dev->alive == true, have completed. Any fault handlers
659-
* that start after synchronize_rcu() has started will abort
660+
* that start after synchronize_srcu() has started will abort
660661
* upon seeing dax_dev->alive == false.
661662
*/
662663
dax_dev->alive = false;
663-
synchronize_rcu();
664+
synchronize_srcu(&dax_srcu);
664665
unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
665666
cdev_del(cdev);
666667
device_unregister(dev);

0 commit comments

Comments
 (0)