Skip to content

Commit 6b523df

Browse files
jankaratytso
authored andcommitted
ext4: use transaction reservation for extent conversion in ext4_end_io
Later we would like to clear PageWriteback bit only after extent conversion from unwritten to written extents is performed. However it is not possible to start a transaction after PageWriteback is set because that violates lock ordering (and is easy to deadlock). So we have to reserve a transaction before locking pages and sending them for IO and later we use the transaction for extent conversion from ext4_end_io(). Reviewed-by: Zheng Liu <[email protected]> Signed-off-by: Jan Kara <[email protected]> Signed-off-by: "Theodore Ts'o" <[email protected]>
1 parent 3613d22 commit 6b523df

File tree

5 files changed

+69
-24
lines changed

5 files changed

+69
-24
lines changed

fs/ext4/ext4.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,13 @@ struct ext4_map_blocks {
184184
#define EXT4_IO_END_DIRECT 0x0004
185185

186186
/*
187-
* For converting uninitialized extents on a work queue.
187+
* For converting uninitialized extents on a work queue. 'handle' is used for
188+
* buffered writeback.
188189
*/
189190
typedef struct ext4_io_end {
190191
struct list_head list; /* per-file finished IO list */
192+
handle_t *handle; /* handle reserved for extent
193+
* conversion */
191194
struct inode *inode; /* file being written to */
192195
unsigned int flag; /* unwritten or not */
193196
loff_t offset; /* offset in the file */
@@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
13221325
struct ext4_io_end *io_end)
13231326
{
13241327
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1328+
/* Writeback has to have coversion transaction reserved */
1329+
WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
1330+
!(io_end->flag & EXT4_IO_END_DIRECT));
13251331
io_end->flag |= EXT4_IO_END_UNWRITTEN;
13261332
atomic_inc(&EXT4_I(inode)->i_unwritten);
13271333
}
@@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *);
25912597
extern void ext4_ext_release(struct super_block *);
25922598
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
25932599
loff_t len);
2594-
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
2595-
ssize_t len);
2600+
extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
2601+
loff_t offset, ssize_t len);
25962602
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
25972603
struct ext4_map_blocks *map, int flags);
25982604
extern int ext4_ext_calc_metadata_amount(struct inode *inode,

fs/ext4/ext4_jbd2.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
134134
#define EXT4_HT_MIGRATE 8
135135
#define EXT4_HT_MOVE_EXTENTS 9
136136
#define EXT4_HT_XATTR 10
137-
#define EXT4_HT_MAX 11
137+
#define EXT4_HT_EXT_CONVERT 11
138+
#define EXT4_HT_MAX 12
138139

139140
/**
140141
* struct ext4_journal_cb_entry - Base structure for callback information.
@@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
319320
#define ext4_journal_stop(handle) \
320321
__ext4_journal_stop(__func__, __LINE__, (handle))
321322

322-
#define ext4_journal_start_reserve(handle, type) \
323+
#define ext4_journal_start_reserved(handle, type) \
323324
__ext4_journal_start_reserved((handle), __LINE__, (type))
324325

325326
handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,

fs/ext4/extents.c

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4566,10 +4566,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
45664566
* function, to convert the fallocated extents after IO is completed.
45674567
* Returns 0 on success.
45684568
*/
4569-
int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4570-
ssize_t len)
4569+
int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4570+
loff_t offset, ssize_t len)
45714571
{
4572-
handle_t *handle;
45734572
unsigned int max_blocks;
45744573
int ret = 0;
45754574
int ret2 = 0;
@@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
45844583
max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
45854584
map.m_lblk);
45864585
/*
4587-
* credits to insert 1 extent into extent tree
4586+
* This is somewhat ugly but the idea is clear: When transaction is
4587+
* reserved, everything goes into it. Otherwise we rather start several
4588+
* smaller transactions for conversion of each extent separately.
45884589
*/
4589-
credits = ext4_chunk_trans_blocks(inode, max_blocks);
4590+
if (handle) {
4591+
handle = ext4_journal_start_reserved(handle,
4592+
EXT4_HT_EXT_CONVERT);
4593+
if (IS_ERR(handle))
4594+
return PTR_ERR(handle);
4595+
credits = 0;
4596+
} else {
4597+
/*
4598+
* credits to insert 1 extent into extent tree
4599+
*/
4600+
credits = ext4_chunk_trans_blocks(inode, max_blocks);
4601+
}
45904602
while (ret >= 0 && ret < max_blocks) {
45914603
map.m_lblk += ret;
45924604
map.m_len = (max_blocks -= ret);
4593-
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
4594-
if (IS_ERR(handle)) {
4595-
ret = PTR_ERR(handle);
4596-
break;
4605+
if (credits) {
4606+
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4607+
credits);
4608+
if (IS_ERR(handle)) {
4609+
ret = PTR_ERR(handle);
4610+
break;
4611+
}
45974612
}
45984613
ret = ext4_map_blocks(handle, inode, &map,
45994614
EXT4_GET_BLOCKS_IO_CONVERT_EXT);
@@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
46044619
inode->i_ino, map.m_lblk,
46054620
map.m_len, ret);
46064621
ext4_mark_inode_dirty(handle, inode);
4607-
ret2 = ext4_journal_stop(handle);
4608-
if (ret <= 0 || ret2 )
4622+
if (credits)
4623+
ret2 = ext4_journal_stop(handle);
4624+
if (ret <= 0 || ret2)
46094625
break;
46104626
}
4627+
if (!credits)
4628+
ret2 = ext4_journal_stop(handle);
46114629
return ret > 0 ? ret2 : ret;
46124630
}
46134631

fs/ext4/inode.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page,
14101410
struct mpage_da_data {
14111411
struct inode *inode;
14121412
struct writeback_control *wbc;
1413+
14131414
pgoff_t first_page; /* The first page to write */
14141415
pgoff_t next_page; /* Current page to examine */
14151416
pgoff_t last_page; /* Last page to examine */
@@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
21082109
err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
21092110
if (err < 0)
21102111
return err;
2111-
if (map->m_flags & EXT4_MAP_UNINIT)
2112+
if (map->m_flags & EXT4_MAP_UNINIT) {
2113+
if (!mpd->io_submit.io_end->handle &&
2114+
ext4_handle_valid(handle)) {
2115+
mpd->io_submit.io_end->handle = handle->h_rsv_handle;
2116+
handle->h_rsv_handle = NULL;
2117+
}
21122118
ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
2119+
}
21132120

21142121
BUG_ON(map->m_len == 0);
21152122
if (map->m_flags & EXT4_MAP_NEW) {
@@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping,
23512358
handle_t *handle = NULL;
23522359
struct mpage_da_data mpd;
23532360
struct inode *inode = mapping->host;
2354-
int needed_blocks, ret = 0;
2361+
int needed_blocks, rsv_blocks = 0, ret = 0;
23552362
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
23562363
bool done;
23572364
struct blk_plug plug;
@@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping,
23792386
if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
23802387
return -EROFS;
23812388

2389+
if (ext4_should_dioread_nolock(inode)) {
2390+
/*
2391+
* We may need to convert upto one extent per block in
2392+
* the page and we may dirty the inode.
2393+
*/
2394+
rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
2395+
}
2396+
23822397
/*
23832398
* If we have inline data and arrive here, it means that
23842399
* we will soon create the block for the 1st page, so
@@ -2438,8 +2453,8 @@ static int ext4_da_writepages(struct address_space *mapping,
24382453
needed_blocks = ext4_da_writepages_trans_blocks(inode);
24392454

24402455
/* start a new transaction */
2441-
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2442-
needed_blocks);
2456+
handle = ext4_journal_start_with_reserve(inode,
2457+
EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
24432458
if (IS_ERR(handle)) {
24442459
ret = PTR_ERR(handle);
24452460
ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
31203135
* for non AIO case, since the IO is already
31213136
* completed, we could do the conversion right here
31223137
*/
3123-
err = ext4_convert_unwritten_extents(inode,
3138+
err = ext4_convert_unwritten_extents(NULL, inode,
31243139
offset, ret);
31253140
if (err < 0)
31263141
ret = err;

fs/ext4/page-io.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
6666
{
6767
BUG_ON(!list_empty(&io_end->list));
6868
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
69+
WARN_ON(io_end->handle);
6970

7071
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
7172
wake_up_all(ext4_ioend_wq(io_end->inode));
@@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io)
9293
struct inode *inode = io->inode;
9394
loff_t offset = io->offset;
9495
ssize_t size = io->size;
96+
handle_t *handle = io->handle;
9597
int ret = 0;
9698

9799
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
98100
"list->prev 0x%p\n",
99101
io, inode->i_ino, io->list.next, io->list.prev);
100102

101-
ret = ext4_convert_unwritten_extents(inode, offset, size);
103+
io->handle = NULL; /* Following call will use up the handle */
104+
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
102105
if (ret < 0) {
103106
ext4_msg(inode->i_sb, KERN_EMERG,
104107
"failed to convert unwritten extents to written "
@@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
228231

229232
if (atomic_dec_and_test(&io_end->count)) {
230233
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
231-
err = ext4_convert_unwritten_extents(io_end->inode,
232-
io_end->offset, io_end->size);
234+
err = ext4_convert_unwritten_extents(io_end->handle,
235+
io_end->inode, io_end->offset,
236+
io_end->size);
237+
io_end->handle = NULL;
233238
ext4_clear_io_unwritten_flag(io_end);
234239
}
235240
ext4_release_io_end(io_end);

0 commit comments

Comments
 (0)