Skip to content

Commit 911fa9b

Browse files
Chandan Babu Rsmb49
Chandan Babu R
authored andcommitted
xfs: Fix false ENOSPC when performing direct write on a delalloc extent in cow fork
BugLink: https://bugs.launchpad.net/bugs/2050038 [ Upstream commit d621133 ] On a higly fragmented filesystem a Direct IO write can fail with -ENOSPC error even though the filesystem has sufficient number of free blocks. This occurs if the file offset range on which the write operation is being performed has a delalloc extent in the cow fork and this delalloc extent begins much before the Direct IO range. In such a scenario, xfs_reflink_allocate_cow() invokes xfs_bmapi_write() to allocate the blocks mapped by the delalloc extent. The extent thus allocated may not cover the beginning of file offset range on which the Direct IO write was issued. Hence xfs_reflink_allocate_cow() ends up returning -ENOSPC. The following script reliably recreates the bug described above. #!/usr/bin/bash device=/dev/loop0 shortdev=$(basename $device) mntpnt=/mnt/ file1=${mntpnt}/file1 file2=${mntpnt}/file2 fragmentedfile=${mntpnt}/fragmentedfile punchprog=/root/repos/xfstests-dev/src/punch-alternating errortag=/sys/fs/xfs/${shortdev}/errortag/bmap_alloc_minlen_extent umount $device > /dev/null 2>&1 echo "Create FS" mkfs.xfs -f -m reflink=1 $device > /dev/null 2>&1 if [[ $? != 0 ]]; then echo "mkfs failed." exit 1 fi echo "Mount FS" mount $device $mntpnt > /dev/null 2>&1 if [[ $? != 0 ]]; then echo "mount failed." exit 1 fi echo "Create source file" xfs_io -f -c "pwrite 0 32M" $file1 > /dev/null 2>&1 sync echo "Create Reflinked file" xfs_io -f -c "reflink $file1" $file2 &>/dev/null echo "Set cowextsize" xfs_io -c "cowextsize 16M" $file1 > /dev/null 2>&1 echo "Fragment FS" xfs_io -f -c "pwrite 0 64M" $fragmentedfile > /dev/null 2>&1 sync $punchprog $fragmentedfile echo "Allocate block sized extent from now onwards" echo -n 1 > $errortag echo "Create 16MiB delalloc extent in CoW fork" xfs_io -c "pwrite 0 4k" $file1 > /dev/null 2>&1 sync echo "Direct I/O write at offset 12k" xfs_io -d -c "pwrite 12k 8k" $file1 This commit fixes the bug by invoking xfs_bmapi_write() in a loop until disk blocks are allocated for atleast the starting file offset of the Direct IO write range. Fixes: 3c68d44 ("xfs: allocate direct I/O COW blocks in iomap_begin") Reported-and-Root-caused-by: Wengang Wang <[email protected]> Signed-off-by: Chandan Babu R <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> [djwong: slight editing to make the locking less grody, and fix some style things] Signed-off-by: Darrick J. Wong <[email protected]> Signed-off-by: Leah Rumancik <[email protected]> Acked-by: Chandan Babu R <[email protected]> Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Portia Stephens <[email protected]> Signed-off-by: Stefan Bader <[email protected]>
1 parent 272d2b6 commit 911fa9b

File tree

1 file changed

+163
-35
lines changed

1 file changed

+163
-35
lines changed

fs/xfs/xfs_reflink.c

Lines changed: 163 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,41 @@ xfs_find_trim_cow_extent(
340340
return 0;
341341
}
342342

343-
/* Allocate all CoW reservations covering a range of blocks in a file. */
344-
int
345-
xfs_reflink_allocate_cow(
343+
static int
344+
xfs_reflink_convert_unwritten(
345+
struct xfs_inode *ip,
346+
struct xfs_bmbt_irec *imap,
347+
struct xfs_bmbt_irec *cmap,
348+
bool convert_now)
349+
{
350+
xfs_fileoff_t offset_fsb = imap->br_startoff;
351+
xfs_filblks_t count_fsb = imap->br_blockcount;
352+
int error;
353+
354+
/*
355+
* cmap might larger than imap due to cowextsize hint.
356+
*/
357+
xfs_trim_extent(cmap, offset_fsb, count_fsb);
358+
359+
/*
360+
* COW fork extents are supposed to remain unwritten until we're ready
361+
* to initiate a disk write. For direct I/O we are going to write the
362+
* data and need the conversion, but for buffered writes we're done.
363+
*/
364+
if (!convert_now || cmap->br_state == XFS_EXT_NORM)
365+
return 0;
366+
367+
trace_xfs_reflink_convert_cow(ip, cmap);
368+
369+
error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
370+
if (!error)
371+
cmap->br_state = XFS_EXT_NORM;
372+
373+
return error;
374+
}
375+
376+
static int
377+
xfs_reflink_fill_cow_hole(
346378
struct xfs_inode *ip,
347379
struct xfs_bmbt_irec *imap,
348380
struct xfs_bmbt_irec *cmap,
@@ -351,25 +383,12 @@ xfs_reflink_allocate_cow(
351383
bool convert_now)
352384
{
353385
struct xfs_mount *mp = ip->i_mount;
354-
xfs_fileoff_t offset_fsb = imap->br_startoff;
355-
xfs_filblks_t count_fsb = imap->br_blockcount;
356386
struct xfs_trans *tp;
357-
int nimaps, error = 0;
358-
bool found;
359387
xfs_filblks_t resaligned;
360-
xfs_extlen_t resblks = 0;
361-
362-
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
363-
if (!ip->i_cowfp) {
364-
ASSERT(!xfs_is_reflink_inode(ip));
365-
xfs_ifork_init_cow(ip);
366-
}
367-
368-
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
369-
if (error || !*shared)
370-
return error;
371-
if (found)
372-
goto convert;
388+
xfs_extlen_t resblks;
389+
int nimaps;
390+
int error;
391+
bool found;
373392

374393
resaligned = xfs_aligned_fsb_count(imap->br_startoff,
375394
imap->br_blockcount, xfs_get_cowextsz_hint(ip));
@@ -385,17 +404,17 @@ xfs_reflink_allocate_cow(
385404

386405
*lockmode = XFS_ILOCK_EXCL;
387406

388-
/*
389-
* Check for an overlapping extent again now that we dropped the ilock.
390-
*/
391407
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
392408
if (error || !*shared)
393409
goto out_trans_cancel;
410+
394411
if (found) {
395412
xfs_trans_cancel(tp);
396413
goto convert;
397414
}
398415

416+
ASSERT(cmap->br_startoff > imap->br_startoff);
417+
399418
/* Allocate the entire reservation as unwritten blocks. */
400419
nimaps = 1;
401420
error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
@@ -415,26 +434,135 @@ xfs_reflink_allocate_cow(
415434
*/
416435
if (nimaps == 0)
417436
return -ENOSPC;
437+
418438
convert:
419-
xfs_trim_extent(cmap, offset_fsb, count_fsb);
420-
/*
421-
* COW fork extents are supposed to remain unwritten until we're ready
422-
* to initiate a disk write. For direct I/O we are going to write the
423-
* data and need the conversion, but for buffered writes we're done.
424-
*/
425-
if (!convert_now || cmap->br_state == XFS_EXT_NORM)
426-
return 0;
427-
trace_xfs_reflink_convert_cow(ip, cmap);
428-
error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
429-
if (!error)
430-
cmap->br_state = XFS_EXT_NORM;
439+
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
440+
441+
out_trans_cancel:
442+
xfs_trans_cancel(tp);
431443
return error;
444+
}
445+
446+
static int
447+
xfs_reflink_fill_delalloc(
448+
struct xfs_inode *ip,
449+
struct xfs_bmbt_irec *imap,
450+
struct xfs_bmbt_irec *cmap,
451+
bool *shared,
452+
uint *lockmode,
453+
bool convert_now)
454+
{
455+
struct xfs_mount *mp = ip->i_mount;
456+
struct xfs_trans *tp;
457+
int nimaps;
458+
int error;
459+
bool found;
460+
461+
do {
462+
xfs_iunlock(ip, *lockmode);
463+
*lockmode = 0;
464+
465+
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 0, 0,
466+
false, &tp);
467+
if (error)
468+
return error;
469+
470+
*lockmode = XFS_ILOCK_EXCL;
471+
472+
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared,
473+
&found);
474+
if (error || !*shared)
475+
goto out_trans_cancel;
476+
477+
if (found) {
478+
xfs_trans_cancel(tp);
479+
break;
480+
}
481+
482+
ASSERT(isnullstartblock(cmap->br_startblock) ||
483+
cmap->br_startblock == DELAYSTARTBLOCK);
484+
485+
/*
486+
* Replace delalloc reservation with an unwritten extent.
487+
*/
488+
nimaps = 1;
489+
error = xfs_bmapi_write(tp, ip, cmap->br_startoff,
490+
cmap->br_blockcount,
491+
XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0,
492+
cmap, &nimaps);
493+
if (error)
494+
goto out_trans_cancel;
495+
496+
xfs_inode_set_cowblocks_tag(ip);
497+
error = xfs_trans_commit(tp);
498+
if (error)
499+
return error;
500+
501+
/*
502+
* Allocation succeeded but the requested range was not even
503+
* partially satisfied? Bail out!
504+
*/
505+
if (nimaps == 0)
506+
return -ENOSPC;
507+
} while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
508+
509+
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
432510

433511
out_trans_cancel:
434512
xfs_trans_cancel(tp);
435513
return error;
436514
}
437515

516+
/* Allocate all CoW reservations covering a range of blocks in a file. */
517+
int
518+
xfs_reflink_allocate_cow(
519+
struct xfs_inode *ip,
520+
struct xfs_bmbt_irec *imap,
521+
struct xfs_bmbt_irec *cmap,
522+
bool *shared,
523+
uint *lockmode,
524+
bool convert_now)
525+
{
526+
int error;
527+
bool found;
528+
529+
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
530+
if (!ip->i_cowfp) {
531+
ASSERT(!xfs_is_reflink_inode(ip));
532+
xfs_ifork_init_cow(ip);
533+
}
534+
535+
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
536+
if (error || !*shared)
537+
return error;
538+
539+
/* CoW fork has a real extent */
540+
if (found)
541+
return xfs_reflink_convert_unwritten(ip, imap, cmap,
542+
convert_now);
543+
544+
/*
545+
* CoW fork does not have an extent and data extent is shared.
546+
* Allocate a real extent in the CoW fork.
547+
*/
548+
if (cmap->br_startoff > imap->br_startoff)
549+
return xfs_reflink_fill_cow_hole(ip, imap, cmap, shared,
550+
lockmode, convert_now);
551+
552+
/*
553+
* CoW fork has a delalloc reservation. Replace it with a real extent.
554+
* There may or may not be a data fork mapping.
555+
*/
556+
if (isnullstartblock(cmap->br_startblock) ||
557+
cmap->br_startblock == DELAYSTARTBLOCK)
558+
return xfs_reflink_fill_delalloc(ip, imap, cmap, shared,
559+
lockmode, convert_now);
560+
561+
/* Shouldn't get here. */
562+
ASSERT(0);
563+
return -EFSCORRUPTED;
564+
}
565+
438566
/*
439567
* Cancel CoW reservations for some block range of an inode.
440568
*

0 commit comments

Comments
 (0)