Skip to content

Commit 39802d1

Browse files
mmarcinismb49
authored andcommitted
RDMA/core: Fix umem iterator when PAGE_SIZE is greater then HCA pgsz
BugLink: https://bugs.launchpad.net/bugs/2050858 [ Upstream commit 4fbc3a5 ] 64k pages introduce the situation in this diagram when the HCA 4k page size is being used: +-------------------------------------------+ <--- 64k aligned VA | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ <--- Live HCA page |OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO| <--- offset | | <--- VA | MR data | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ The VA addresses are coming from rdma-core in this diagram can be arbitrary, but for 64k pages, the VA may be offset by some number of HCA 4k pages and followed by some number of HCA 4k pages. The current iterator doesn't account for either the preceding 4k pages or the following 4k pages. Fix the issue by extending the ib_block_iter to contain the number of DMA pages like comment [1] says and by using __sg_advance to start the iterator at the first live HCA page. The changes are contained in a parallel set of iterator start and next functions that are umem aware and specific to umem since there is one user of the rdma_for_each_block() without umem. These two fixes prevents the extra pages before and after the user MR data. Fix the preceding pages by using the __sq_advance field to start at the first 4k page containing MR data. Fix the following pages by saving the number of pgsz blocks in the iterator state and downcounting on each next. This fix allows for the elimination of the small page crutch noted in the Fixes. Fixes: 10c75cc ("RDMA/umem: Prevent small pages from being returned by ib_umem_find_best_pgsz()") Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Mike Marciniszyn <[email protected]> Signed-off-by: Shiraz Saleem <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]> Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Portia Stephens <[email protected]> Signed-off-by: Stefan Bader <[email protected]>
1 parent c007591 commit 39802d1

File tree

3 files changed

+9
-7
lines changed

3 files changed

+9
-7
lines changed

drivers/infiniband/core/umem.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
9898
return page_size;
9999
}
100100

101-
/* rdma_for_each_block() has a bug if the page size is smaller than the
102-
* page size used to build the umem. For now prevent smaller page sizes
103-
* from being returned.
104-
*/
105-
pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
106-
107101
/* The best result is the smallest page size that results in the minimum
108102
* number of required pages. Compute the largest page size that could
109103
* work based on VA address bits that don't change.

include/rdma/ib_umem.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
8585
{
8686
__rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
8787
umem->sgt_append.sgt.nents, pgsz);
88+
biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1);
89+
biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz);
90+
}
91+
92+
static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter)
93+
{
94+
return __rdma_block_iter_next(biter) && biter->__sg_numblocks--;
8895
}
8996

9097
/**
@@ -100,7 +107,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
100107
*/
101108
#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
102109
for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
103-
__rdma_block_iter_next(biter);)
110+
__rdma_umem_block_iter_next(biter);)
104111

105112
#ifdef CONFIG_INFINIBAND_USER_MEM
106113

include/rdma/ib_verbs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2817,6 +2817,7 @@ struct ib_block_iter {
28172817
/* internal states */
28182818
struct scatterlist *__sg; /* sg holding the current aligned block */
28192819
dma_addr_t __dma_addr; /* unaligned DMA address of this block */
2820+
size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */
28202821
unsigned int __sg_nents; /* number of SG entries */
28212822
unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
28222823
unsigned int __pg_bit; /* alignment of current block */

0 commit comments

Comments
 (0)