Skip to content

Commit eb9d7a6

Browse files
aikmpe
authored andcommitted
powerpc/mm_iommu: Fix potential deadlock
Currently mm_iommu_do_alloc() is called in 2 cases: - VFIO_IOMMU_SPAPR_REGISTER_MEMORY ioctl() for normal memory: this locks &mem_list_mutex and then locks mm::mmap_sem several times when adjusting locked_vm or pinning pages; - vfio_pci_nvgpu_regops::mmap() for GPU memory: this is called with mm::mmap_sem held already and it locks &mem_list_mutex. So one can craft a userspace program to do special ioctl and mmap in 2 threads concurrently and cause a deadlock which lockdep warns about (below). We did not hit this yet because QEMU constructs the machine in a single thread. This moves the overlap check next to where the new entry is added and reduces the amount of time spent with &mem_list_mutex held. This moves locked_vm adjustment from under &mem_list_mutex. This relies on mm_iommu_adjust_locked_vm() doing nothing when entries==0. This is one of the lockdep warnings: ====================================================== WARNING: possible circular locking dependency detected 5.1.0-rc2-le_nv2_aikATfstn1-p1 #363 Not tainted ------------------------------------------------------ qemu-system-ppc/8038 is trying to acquire lock: 000000002ec6c453 (mem_list_mutex){+.+.}, at: mm_iommu_do_alloc+0x70/0x490 but task is already holding lock: 00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++}: lock_acquire+0xf8/0x260 down_write+0x44/0xa0 mm_iommu_adjust_locked_vm.part.1+0x4c/0x190 mm_iommu_do_alloc+0x310/0x490 tce_iommu_ioctl.part.9+0xb84/0x1150 [vfio_iommu_spapr_tce] vfio_fops_unl_ioctl+0x94/0x430 [vfio] do_vfs_ioctl+0xe4/0x930 ksys_ioctl+0xc4/0x110 sys_ioctl+0x28/0x80 system_call+0x5c/0x70 -> #0 (mem_list_mutex){+.+.}: __lock_acquire+0x1484/0x1900 lock_acquire+0xf8/0x260 __mutex_lock+0x88/0xa70 mm_iommu_do_alloc+0x70/0x490 vfio_pci_nvgpu_mmap+0xc0/0x130 [vfio_pci] vfio_pci_mmap+0x198/0x2a0 [vfio_pci] vfio_device_fops_mmap+0x44/0x70 [vfio] mmap_region+0x5d4/0x770 do_mmap+0x42c/0x650 vm_mmap_pgoff+0x124/0x160 ksys_mmap_pgoff+0xdc/0x2f0 sys_mmap+0x40/0x80 system_call+0x5c/0x70 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(mem_list_mutex); lock(&mm->mmap_sem); lock(mem_list_mutex); *** DEADLOCK *** 1 lock held by qemu-system-ppc/8038: #0: 00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160 Fixes: c10c21e ("powerpc/vfio/iommu/kvm: Do not pin device memory", 2018-12-19) Signed-off-by: Alexey Kardashevskiy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent 8adddf3 commit eb9d7a6

File tree

1 file changed

+39
-36
lines changed

1 file changed

+39
-36
lines changed

arch/powerpc/mm/mmu_context_iommu.c

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -95,28 +95,14 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
9595
unsigned long entries, unsigned long dev_hpa,
9696
struct mm_iommu_table_group_mem_t **pmem)
9797
{
98-
struct mm_iommu_table_group_mem_t *mem;
99-
long i, ret, locked_entries = 0;
98+
struct mm_iommu_table_group_mem_t *mem, *mem2;
99+
long i, ret, locked_entries = 0, pinned = 0;
100100
unsigned int pageshift;
101101

102-
mutex_lock(&mem_list_mutex);
103-
104-
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
105-
next) {
106-
/* Overlap? */
107-
if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
108-
(ua < (mem->ua +
109-
(mem->entries << PAGE_SHIFT)))) {
110-
ret = -EINVAL;
111-
goto unlock_exit;
112-
}
113-
114-
}
115-
116102
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
117103
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
118104
if (ret)
119-
goto unlock_exit;
105+
return ret;
120106

121107
locked_entries = entries;
122108
}
@@ -150,15 +136,10 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
150136
down_read(&mm->mmap_sem);
151137
ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
152138
up_read(&mm->mmap_sem);
139+
pinned = ret > 0 ? ret : 0;
153140
if (ret != entries) {
154-
/* free the reference taken */
155-
for (i = 0; i < ret; i++)
156-
put_page(mem->hpages[i]);
157-
158-
vfree(mem->hpas);
159-
kfree(mem);
160141
ret = -EFAULT;
161-
goto unlock_exit;
142+
goto free_exit;
162143
}
163144

164145
pageshift = PAGE_SHIFT;
@@ -183,21 +164,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
183164
}
184165

185166
good_exit:
186-
ret = 0;
187167
atomic64_set(&mem->mapped, 1);
188168
mem->used = 1;
189169
mem->ua = ua;
190170
mem->entries = entries;
191-
*pmem = mem;
192171

193-
list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
172+
mutex_lock(&mem_list_mutex);
194173

195-
unlock_exit:
196-
if (locked_entries && ret)
197-
mm_iommu_adjust_locked_vm(mm, locked_entries, false);
174+
list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
175+
/* Overlap? */
176+
if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
177+
(ua < (mem2->ua +
178+
(mem2->entries << PAGE_SHIFT)))) {
179+
ret = -EINVAL;
180+
mutex_unlock(&mem_list_mutex);
181+
goto free_exit;
182+
}
183+
}
184+
185+
list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
198186

199187
mutex_unlock(&mem_list_mutex);
200188

189+
*pmem = mem;
190+
191+
return 0;
192+
193+
free_exit:
194+
/* free the reference taken */
195+
for (i = 0; i < pinned; i++)
196+
put_page(mem->hpages[i]);
197+
198+
vfree(mem->hpas);
199+
kfree(mem);
200+
201+
unlock_exit:
202+
mm_iommu_adjust_locked_vm(mm, locked_entries, false);
203+
201204
return ret;
202205
}
203206

@@ -266,7 +269,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
266269
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
267270
{
268271
long ret = 0;
269-
unsigned long entries, dev_hpa;
272+
unsigned long unlock_entries = 0;
270273

271274
mutex_lock(&mem_list_mutex);
272275

@@ -287,17 +290,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
287290
goto unlock_exit;
288291
}
289292

293+
if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
294+
unlock_entries = mem->entries;
295+
290296
/* @mapped became 0 so now mappings are disabled, release the region */
291-
entries = mem->entries;
292-
dev_hpa = mem->dev_hpa;
293297
mm_iommu_release(mem);
294298

295-
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
296-
mm_iommu_adjust_locked_vm(mm, entries, false);
297-
298299
unlock_exit:
299300
mutex_unlock(&mem_list_mutex);
300301

302+
mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
303+
301304
return ret;
302305
}
303306
EXPORT_SYMBOL_GPL(mm_iommu_put);

0 commit comments

Comments
 (0)