@@ -3298,7 +3298,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3298
3298
if (ret )
3299
3299
goto out_put ;
3300
3300
3301
- ret = btrfs_truncate_free_space_cache (root , trans , inode );
3301
+ ret = btrfs_truncate_free_space_cache (root , trans , NULL , inode );
3302
3302
if (ret )
3303
3303
goto out_put ;
3304
3304
}
@@ -3382,20 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3382
3382
return 0 ;
3383
3383
}
3384
3384
3385
- int btrfs_write_dirty_block_groups (struct btrfs_trans_handle * trans ,
3385
+ /*
3386
+ * transaction commit does final block group cache writeback during a
3387
+ * critical section where nothing is allowed to change the FS. This is
3388
+ * required in order for the cache to actually match the block group,
3389
+ * but can introduce a lot of latency into the commit.
3390
+ *
3391
+ * So, btrfs_start_dirty_block_groups is here to kick off block group
3392
+ * cache IO. There's a chance we'll have to redo some of it if the
3393
+ * block group changes again during the commit, but it greatly reduces
3394
+ * the commit latency by getting rid of the easy block groups while
3395
+ * we're still allowing others to join the commit.
3396
+ */
3397
+ int btrfs_start_dirty_block_groups (struct btrfs_trans_handle * trans ,
3386
3398
struct btrfs_root * root )
3387
3399
{
3388
3400
struct btrfs_block_group_cache * cache ;
3389
3401
struct btrfs_transaction * cur_trans = trans -> transaction ;
3390
3402
int ret = 0 ;
3391
3403
int should_put ;
3392
- struct btrfs_path * path ;
3393
- LIST_HEAD (io );
3404
+ struct btrfs_path * path = NULL ;
3405
+ LIST_HEAD (dirty );
3406
+ struct list_head * io = & cur_trans -> io_bgs ;
3394
3407
int num_started = 0 ;
3395
- int num_waited = 0 ;
3408
+ int loops = 0 ;
3409
+
3410
+ spin_lock (& cur_trans -> dirty_bgs_lock );
3411
+ if (!list_empty (& cur_trans -> dirty_bgs )) {
3412
+ list_splice_init (& cur_trans -> dirty_bgs , & dirty );
3413
+ }
3414
+ spin_unlock (& cur_trans -> dirty_bgs_lock );
3396
3415
3397
- if (list_empty (& cur_trans -> dirty_bgs ))
3416
+ again :
3417
+ if (list_empty (& dirty )) {
3418
+ btrfs_free_path (path );
3398
3419
return 0 ;
3420
+ }
3421
+
3422
+ /*
3423
+ * make sure all the block groups on our dirty list actually
3424
+ * exist
3425
+ */
3426
+ btrfs_create_pending_block_groups (trans , root );
3427
+
3428
+ if (!path ) {
3429
+ path = btrfs_alloc_path ();
3430
+ if (!path )
3431
+ return - ENOMEM ;
3432
+ }
3433
+
3434
+ while (!list_empty (& dirty )) {
3435
+ cache = list_first_entry (& dirty ,
3436
+ struct btrfs_block_group_cache ,
3437
+ dirty_list );
3438
+
3439
+ /*
3440
+ * cache_write_mutex is here only to save us from balance
3441
+ * deleting this block group while we are writing out the
3442
+ * cache
3443
+ */
3444
+ mutex_lock (& trans -> transaction -> cache_write_mutex );
3445
+
3446
+ /*
3447
+ * this can happen if something re-dirties a block
3448
+ * group that is already under IO. Just wait for it to
3449
+ * finish and then do it all again
3450
+ */
3451
+ if (!list_empty (& cache -> io_list )) {
3452
+ list_del_init (& cache -> io_list );
3453
+ btrfs_wait_cache_io (root , trans , cache ,
3454
+ & cache -> io_ctl , path ,
3455
+ cache -> key .objectid );
3456
+ btrfs_put_block_group (cache );
3457
+ }
3458
+
3459
+
3460
+ /*
3461
+ * btrfs_wait_cache_io uses the cache->dirty_list to decide
3462
+ * if it should update the cache_state. Don't delete
3463
+ * until after we wait.
3464
+ *
3465
+ * Since we're not running in the commit critical section
3466
+ * we need the dirty_bgs_lock to protect from update_block_group
3467
+ */
3468
+ spin_lock (& cur_trans -> dirty_bgs_lock );
3469
+ list_del_init (& cache -> dirty_list );
3470
+ spin_unlock (& cur_trans -> dirty_bgs_lock );
3471
+
3472
+ should_put = 1 ;
3473
+
3474
+ cache_save_setup (cache , trans , path );
3475
+
3476
+ if (cache -> disk_cache_state == BTRFS_DC_SETUP ) {
3477
+ cache -> io_ctl .inode = NULL ;
3478
+ ret = btrfs_write_out_cache (root , trans , cache , path );
3479
+ if (ret == 0 && cache -> io_ctl .inode ) {
3480
+ num_started ++ ;
3481
+ should_put = 0 ;
3482
+
3483
+ /*
3484
+ * the cache_write_mutex is protecting
3485
+ * the io_list
3486
+ */
3487
+ list_add_tail (& cache -> io_list , io );
3488
+ } else {
3489
+ /*
3490
+ * if we failed to write the cache, the
3491
+ * generation will be bad and life goes on
3492
+ */
3493
+ ret = 0 ;
3494
+ }
3495
+ }
3496
+ if (!ret )
3497
+ ret = write_one_cache_group (trans , root , path , cache );
3498
+ mutex_unlock (& trans -> transaction -> cache_write_mutex );
3499
+
3500
+ /* if its not on the io list, we need to put the block group */
3501
+ if (should_put )
3502
+ btrfs_put_block_group (cache );
3503
+
3504
+ if (ret )
3505
+ break ;
3506
+ }
3507
+
3508
+ /*
3509
+ * go through delayed refs for all the stuff we've just kicked off
3510
+ * and then loop back (just once)
3511
+ */
3512
+ ret = btrfs_run_delayed_refs (trans , root , 0 );
3513
+ if (!ret && loops == 0 ) {
3514
+ loops ++ ;
3515
+ spin_lock (& cur_trans -> dirty_bgs_lock );
3516
+ list_splice_init (& cur_trans -> dirty_bgs , & dirty );
3517
+ spin_unlock (& cur_trans -> dirty_bgs_lock );
3518
+ goto again ;
3519
+ }
3520
+
3521
+ btrfs_free_path (path );
3522
+ return ret ;
3523
+ }
3524
+
3525
+ int btrfs_write_dirty_block_groups (struct btrfs_trans_handle * trans ,
3526
+ struct btrfs_root * root )
3527
+ {
3528
+ struct btrfs_block_group_cache * cache ;
3529
+ struct btrfs_transaction * cur_trans = trans -> transaction ;
3530
+ int ret = 0 ;
3531
+ int should_put ;
3532
+ struct btrfs_path * path ;
3533
+ struct list_head * io = & cur_trans -> io_bgs ;
3534
+ int num_started = 0 ;
3399
3535
3400
3536
path = btrfs_alloc_path ();
3401
3537
if (!path )
@@ -3423,14 +3559,16 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3423
3559
& cache -> io_ctl , path ,
3424
3560
cache -> key .objectid );
3425
3561
btrfs_put_block_group (cache );
3426
- num_waited ++ ;
3427
3562
}
3428
3563
3564
+ /*
3565
+ * don't remove from the dirty list until after we've waited
3566
+ * on any pending IO
3567
+ */
3429
3568
list_del_init (& cache -> dirty_list );
3430
3569
should_put = 1 ;
3431
3570
3432
- if (cache -> disk_cache_state == BTRFS_DC_CLEAR )
3433
- cache_save_setup (cache , trans , path );
3571
+ cache_save_setup (cache , trans , path );
3434
3572
3435
3573
if (!ret )
3436
3574
ret = btrfs_run_delayed_refs (trans , root , (unsigned long ) -1 );
@@ -3441,7 +3579,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3441
3579
if (ret == 0 && cache -> io_ctl .inode ) {
3442
3580
num_started ++ ;
3443
3581
should_put = 0 ;
3444
- list_add_tail (& cache -> io_list , & io );
3582
+ list_add_tail (& cache -> io_list , io );
3445
3583
} else {
3446
3584
/*
3447
3585
* if we failed to write the cache, the
@@ -3458,11 +3596,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3458
3596
btrfs_put_block_group (cache );
3459
3597
}
3460
3598
3461
- while (!list_empty (& io )) {
3462
- cache = list_first_entry (& io , struct btrfs_block_group_cache ,
3599
+ while (!list_empty (io )) {
3600
+ cache = list_first_entry (io , struct btrfs_block_group_cache ,
3463
3601
io_list );
3464
3602
list_del_init (& cache -> io_list );
3465
- num_waited ++ ;
3466
3603
btrfs_wait_cache_io (root , trans , cache ,
3467
3604
& cache -> io_ctl , path , cache -> key .objectid );
3468
3605
btrfs_put_block_group (cache );
@@ -5459,15 +5596,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5459
5596
if (!alloc && cache -> cached == BTRFS_CACHE_NO )
5460
5597
cache_block_group (cache , 1 );
5461
5598
5462
- spin_lock (& trans -> transaction -> dirty_bgs_lock );
5463
- if (list_empty (& cache -> dirty_list )) {
5464
- list_add_tail (& cache -> dirty_list ,
5465
- & trans -> transaction -> dirty_bgs );
5466
- trans -> transaction -> num_dirty_bgs ++ ;
5467
- btrfs_get_block_group (cache );
5468
- }
5469
- spin_unlock (& trans -> transaction -> dirty_bgs_lock );
5470
-
5471
5599
byte_in_group = bytenr - cache -> key .objectid ;
5472
5600
WARN_ON (byte_in_group > cache -> key .offset );
5473
5601
@@ -5516,6 +5644,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5516
5644
spin_unlock (& info -> unused_bgs_lock );
5517
5645
}
5518
5646
}
5647
+
5648
+ spin_lock (& trans -> transaction -> dirty_bgs_lock );
5649
+ if (list_empty (& cache -> dirty_list )) {
5650
+ list_add_tail (& cache -> dirty_list ,
5651
+ & trans -> transaction -> dirty_bgs );
5652
+ trans -> transaction -> num_dirty_bgs ++ ;
5653
+ btrfs_get_block_group (cache );
5654
+ }
5655
+ spin_unlock (& trans -> transaction -> dirty_bgs_lock );
5656
+
5519
5657
btrfs_put_block_group (cache );
5520
5658
total -= num_bytes ;
5521
5659
bytenr += num_bytes ;
@@ -8602,10 +8740,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8602
8740
8603
8741
BUG_ON (cache -> ro );
8604
8742
8743
+ again :
8605
8744
trans = btrfs_join_transaction (root );
8606
8745
if (IS_ERR (trans ))
8607
8746
return PTR_ERR (trans );
8608
8747
8748
+ /*
8749
+ * we're not allowed to set block groups readonly after the dirty
8750
+ * block groups cache has started writing. If it already started,
8751
+ * back off and let this transaction commit
8752
+ */
8753
+ mutex_lock (& root -> fs_info -> ro_block_group_mutex );
8754
+ if (trans -> transaction -> dirty_bg_run ) {
8755
+ u64 transid = trans -> transid ;
8756
+
8757
+ mutex_unlock (& root -> fs_info -> ro_block_group_mutex );
8758
+ btrfs_end_transaction (trans , root );
8759
+
8760
+ ret = btrfs_wait_for_commit (root , transid );
8761
+ if (ret )
8762
+ return ret ;
8763
+ goto again ;
8764
+ }
8765
+
8766
+
8609
8767
ret = set_block_group_ro (cache , 0 );
8610
8768
if (!ret )
8611
8769
goto out ;
@@ -8620,6 +8778,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8620
8778
alloc_flags = update_block_group_flags (root , cache -> flags );
8621
8779
check_system_chunk (trans , root , alloc_flags );
8622
8780
}
8781
+ mutex_unlock (& root -> fs_info -> ro_block_group_mutex );
8623
8782
8624
8783
btrfs_end_transaction (trans , root );
8625
8784
return ret ;
@@ -9425,7 +9584,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9425
9584
goto out ;
9426
9585
}
9427
9586
9587
+ /*
9588
+ * get the inode first so any iput calls done for the io_list
9589
+ * aren't the final iput (no unlinks allowed now)
9590
+ */
9428
9591
inode = lookup_free_space_inode (tree_root , block_group , path );
9592
+
9593
+ mutex_lock (& trans -> transaction -> cache_write_mutex );
9594
+ /*
9595
+ * make sure our free spache cache IO is done before remove the
9596
+ * free space inode
9597
+ */
9598
+ spin_lock (& trans -> transaction -> dirty_bgs_lock );
9599
+ if (!list_empty (& block_group -> io_list )) {
9600
+ list_del_init (& block_group -> io_list );
9601
+
9602
+ WARN_ON (!IS_ERR (inode ) && inode != block_group -> io_ctl .inode );
9603
+
9604
+ spin_unlock (& trans -> transaction -> dirty_bgs_lock );
9605
+ btrfs_wait_cache_io (root , trans , block_group ,
9606
+ & block_group -> io_ctl , path ,
9607
+ block_group -> key .objectid );
9608
+ btrfs_put_block_group (block_group );
9609
+ spin_lock (& trans -> transaction -> dirty_bgs_lock );
9610
+ }
9611
+
9612
+ if (!list_empty (& block_group -> dirty_list )) {
9613
+ list_del_init (& block_group -> dirty_list );
9614
+ btrfs_put_block_group (block_group );
9615
+ }
9616
+ spin_unlock (& trans -> transaction -> dirty_bgs_lock );
9617
+ mutex_unlock (& trans -> transaction -> cache_write_mutex );
9618
+
9429
9619
if (!IS_ERR (inode )) {
9430
9620
ret = btrfs_orphan_add (trans , inode );
9431
9621
if (ret ) {
@@ -9518,11 +9708,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9518
9708
9519
9709
spin_lock (& trans -> transaction -> dirty_bgs_lock );
9520
9710
if (!list_empty (& block_group -> dirty_list )) {
9521
- list_del_init (& block_group -> dirty_list );
9522
- btrfs_put_block_group (block_group );
9711
+ WARN_ON (1 );
9712
+ }
9713
+ if (!list_empty (& block_group -> io_list )) {
9714
+ WARN_ON (1 );
9523
9715
}
9524
9716
spin_unlock (& trans -> transaction -> dirty_bgs_lock );
9525
-
9526
9717
btrfs_remove_free_space_cache (block_group );
9527
9718
9528
9719
spin_lock (& block_group -> space_info -> lock );
0 commit comments