Skip to content

Commit b1934cd

Browse files
naotakdave
authored andcommitted
btrfs: zoned: handle broken write pointer on zones
Btrfs rejects to mount a FS if it finds a block group with a broken write pointer (e.g, unequal write pointers on two zones of RAID1 block group). Since such case can happen easily with a power-loss or crash of a system, we need to handle the case more gently. Handle such block group by making it unallocatable, so that there will be no writes into it. That can be done by setting the allocation pointer at the end of allocating region (= block_group->zone_capacity). Then, existing code handle zone_unusable properly. Having proper zone_capacity is necessary for the change. So, set it as fast as possible. We cannot handle RAID0 and RAID10 case like this. But, they are anyway unable to read because of a missing stripe. Fixes: 265f723 ("btrfs: zoned: allow DUP on meta-data block groups") Fixes: 568220f ("btrfs: zoned: support RAID0/1/10 on top of raid stripe tree") CC: [email protected] # 6.1+ Reported-by: HAN Yuwei <[email protected]> Cc: Xuefer <[email protected]> Signed-off-by: Naohiro Aota <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent c346c62 commit b1934cd

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

fs/btrfs/zoned.c

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
14061406
return -EINVAL;
14071407
}
14081408

1409+
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
1410+
14091411
if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
14101412
btrfs_err(bg->fs_info,
14111413
"zoned: cannot recover write pointer for zone %llu",
@@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
14321434
}
14331435

14341436
bg->alloc_offset = zone_info[0].alloc_offset;
1435-
bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
14361437
return 0;
14371438
}
14381439

@@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
14501451
return -EINVAL;
14511452
}
14521453

1454+
/* In case a device is missing we have a cap of 0, so don't use it. */
1455+
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
1456+
14531457
for (i = 0; i < map->num_stripes; i++) {
14541458
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
14551459
zone_info[i].alloc_offset == WP_CONVENTIONAL)
@@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
14711475
if (test_bit(0, active))
14721476
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
14731477
}
1474-
/* In case a device is missing we have a cap of 0, so don't use it. */
1475-
bg->zone_capacity = min_not_zero(zone_info[0].capacity,
1476-
zone_info[1].capacity);
14771478
}
14781479

14791480
if (zone_info[0].alloc_offset != WP_MISSING_DEV)
@@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
15631564
unsigned long *active = NULL;
15641565
u64 last_alloc = 0;
15651566
u32 num_sequential = 0, num_conventional = 0;
1567+
u64 profile;
15661568

15671569
if (!btrfs_is_zoned(fs_info))
15681570
return 0;
@@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
16231625
}
16241626
}
16251627

1626-
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
1628+
profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
1629+
switch (profile) {
16271630
case 0: /* single */
16281631
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
16291632
break;
@@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
16501653
goto out;
16511654
}
16521655

1656+
if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 &&
1657+
profile != BTRFS_BLOCK_GROUP_RAID10) {
1658+
/*
1659+
* Detected broken write pointer. Make this block group
1660+
* unallocatable by setting the allocation pointer at the end of
1661+
* allocatable region. Relocating this block group will fix the
1662+
* mismatch.
1663+
*
1664+
* Currently, we cannot handle RAID0 or RAID10 case like this
1665+
* because we don't have a proper zone_capacity value. But,
1666+
* reading from this block group won't work anyway by a missing
1667+
* stripe.
1668+
*/
1669+
cache->alloc_offset = cache->zone_capacity;
1670+
ret = 0;
1671+
}
1672+
16531673
out:
16541674
/* Reject non SINGLE data profiles without RST */
16551675
if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&

0 commit comments

Comments
 (0)