Skip to content

Commit c8585c6

Browse files
daehojeongtytso
authored andcommitted
ext4: fix races between changing inode journal mode and ext4_writepages
In ext4, there is a race condition between changing inode journal mode and ext4_writepages(). While ext4_writepages() is executed on a non-journalled mode inode, the inode's journal mode could be enabled by ioctl() and then, some pages dirtied after switching the journal mode will be still exposed to ext4_writepages() in non-journaled mode. To resolve this problem, we use fs-wide per-cpu rw semaphore by Jan Kara's suggestion because we don't want to waste ext4_inode_info's space for this extra rare case. Signed-off-by: Daeho Jeong <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]> Reviewed-by: Jan Kara <[email protected]>
1 parent 4c54659 commit c8585c6

File tree

4 files changed

+21
-3
lines changed

4 files changed

+21
-3
lines changed

fs/ext4/ext4.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <linux/ratelimit.h>
3434
#include <crypto/hash.h>
3535
#include <linux/falloc.h>
36+
#include <linux/percpu-rwsem.h>
3637
#ifdef __KERNEL__
3738
#include <linux/compat.h>
3839
#endif
@@ -1508,6 +1509,9 @@ struct ext4_sb_info {
15081509
struct ratelimit_state s_err_ratelimit_state;
15091510
struct ratelimit_state s_warning_ratelimit_state;
15101511
struct ratelimit_state s_msg_ratelimit_state;
1512+
1513+
/* Barrier between changing inodes' journal flags and writepages ops. */
1514+
struct percpu_rw_semaphore s_journal_flag_rwsem;
15111515
};
15121516

15131517
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)

fs/ext4/inode.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping,
26122612
struct blk_plug plug;
26132613
bool give_up_on_write = false;
26142614

2615+
percpu_down_read(&sbi->s_journal_flag_rwsem);
26152616
trace_ext4_writepages(inode, wbc);
26162617

2617-
if (dax_mapping(mapping))
2618-
return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
2619-
wbc);
2618+
if (dax_mapping(mapping)) {
2619+
ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
2620+
wbc);
2621+
goto out_writepages;
2622+
}
26202623

26212624
/*
26222625
* No pages to write? This is mainly a kludge to avoid starting
@@ -2786,6 +2789,7 @@ static int ext4_writepages(struct address_space *mapping,
27862789
out_writepages:
27872790
trace_ext4_writepages_result(inode, wbc, ret,
27882791
nr_to_write - wbc->nr_to_write);
2792+
percpu_up_read(&sbi->s_journal_flag_rwsem);
27892793
return ret;
27902794
}
27912795

@@ -5436,6 +5440,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
54365440
journal_t *journal;
54375441
handle_t *handle;
54385442
int err;
5443+
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54395444

54405445
/*
54415446
* We have to be very careful here: changing a data block's
@@ -5475,6 +5480,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
54755480
}
54765481
}
54775482

5483+
percpu_down_write(&sbi->s_journal_flag_rwsem);
54785484
jbd2_journal_lock_updates(journal);
54795485

54805486
/*
@@ -5491,6 +5497,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
54915497
err = jbd2_journal_flush(journal);
54925498
if (err < 0) {
54935499
jbd2_journal_unlock_updates(journal);
5500+
percpu_up_write(&sbi->s_journal_flag_rwsem);
54945501
ext4_inode_resume_unlocked_dio(inode);
54955502
return err;
54965503
}
@@ -5499,6 +5506,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
54995506
ext4_set_aops(inode);
55005507

55015508
jbd2_journal_unlock_updates(journal);
5509+
percpu_up_write(&sbi->s_journal_flag_rwsem);
5510+
55025511
if (val)
55035512
up_write(&EXT4_I(inode)->i_mmap_sem);
55045513
ext4_inode_resume_unlocked_dio(inode);

fs/ext4/super.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb)
859859
percpu_counter_destroy(&sbi->s_freeinodes_counter);
860860
percpu_counter_destroy(&sbi->s_dirs_counter);
861861
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
862+
percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
862863
brelse(sbi->s_sbh);
863864
#ifdef CONFIG_QUOTA
864865
for (i = 0; i < EXT4_MAXQUOTAS; i++)
@@ -3930,6 +3931,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
39303931
if (!err)
39313932
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
39323933
GFP_KERNEL);
3934+
if (!err)
3935+
err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
3936+
39333937
if (err) {
39343938
ext4_msg(sb, KERN_ERR, "insufficient memory");
39353939
goto failed_mount6;

kernel/locking/percpu-rwsem.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
3737
free_percpu(brw->fast_read_ctr);
3838
brw->fast_read_ctr = NULL; /* catch use after free bugs */
3939
}
40+
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
4041

4142
/*
4243
* This is the fast-path for down_read/up_read. If it succeeds we rely

0 commit comments

Comments
 (0)