Skip to content

Commit 9b06cce

Browse files
tytsogregkh
authored andcommitted
ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea()
commit c755e25 upstream. The xattr_sem deadlock problems fixed in commit 2e81a4e: "ext4: avoid deadlock when expanding inode size" didn't include the use of xattr_sem in fs/ext4/inline.c. With the addition of project quota which added a new extra inode field, this exposed deadlocks in the inline_data code similar to the ones fixed by 2e81a4e. The deadlock can be reproduced via: dmesg -n 7 mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768 mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc mkdir /vdc/a umount /vdc mount -t ext4 /dev/vdc /vdc echo foo > /vdc/a/foo and looks like this: [ 11.158815] [ 11.160276] ============================================= [ 11.161960] [ INFO: possible recursive locking detected ] [ 11.161960] 4.10.0-rc3-00015-g011b30a8a3cf hardkernel#160 Tainted: G W [ 11.161960] --------------------------------------------- [ 11.161960] bash/2519 is trying to acquire lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1225a4b>] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] [ 11.161960] but task is already holding lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] other info that might help us debug this: [ 11.161960] Possible unsafe locking scenario: [ 11.161960] [ 11.161960] CPU0 [ 11.161960] ---- [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] [ 11.161960] *** DEADLOCK *** [ 11.161960] [ 11.161960] May be due to missing lock nesting notation [ 11.161960] [ 11.161960] 4 locks held by bash/2519: [ 11.161960] #0: (sb_writers#3){.+.+.+}, at: [<c11a2414>] mnt_want_write+0x1e/0x3e [ 11.161960] hardkernel#1: (&type->i_mutex_dir_key){++++++}, at: [<c119508b>] path_openat+0x338/0x67a [ 11.161960] hardkernel#2: (jbd2_handle){++++..}, at: [<c123314a>] start_this_handle+0x582/0x622 [ 11.161960] hardkernel#3: (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] stack backtrace: [ 11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G W 4.10.0-rc3-00015-g011b30a8a3cf hardkernel#160 [ 11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014 [ 11.161960] Call Trace: [ 11.161960] dump_stack+0x72/0xa3 [ 11.161960] __lock_acquire+0xb7c/0xcb9 [ 11.161960] ? kvm_clock_read+0x1f/0x29 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] lock_acquire+0x106/0x18a [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] down_write+0x39/0x72 [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ? _raw_read_unlock+0x22/0x2c [ 11.161960] ? jbd2_journal_extend+0x1e2/0x262 [ 11.161960] ? __ext4_journal_get_write_access+0x3d/0x60 [ 11.161960] ext4_mark_inode_dirty+0x17d/0x26d [ 11.161960] ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_try_add_inline_entry+0x69/0x152 [ 11.161960] ext4_add_entry+0xa3/0x848 [ 11.161960] ? __brelse+0x14/0x2f [ 11.161960] ? _raw_spin_unlock_irqrestore+0x44/0x4f [ 11.161960] ext4_add_nondir+0x17/0x5b [ 11.161960] ext4_create+0xcf/0x133 [ 11.161960] ? ext4_mknod+0x12f/0x12f [ 11.161960] lookup_open+0x39e/0x3fb [ 11.161960] ? __wake_up+0x1a/0x40 [ 11.161960] ? lock_acquire+0x11e/0x18a [ 11.161960] path_openat+0x35c/0x67a [ 11.161960] ? sched_clock_cpu+0xd7/0xf2 [ 11.161960] do_filp_open+0x36/0x7c [ 11.161960] ? _raw_spin_unlock+0x22/0x2c [ 11.161960] ? __alloc_fd+0x169/0x173 [ 11.161960] do_sys_open+0x59/0xcc [ 11.161960] SyS_open+0x1d/0x1f [ 11.161960] do_int80_syscall_32+0x4f/0x61 [ 11.161960] entry_INT80_32+0x2f/0x2f [ 11.161960] EIP: 0xb76ad469 [ 11.161960] EFLAGS: 00000286 CPU: 0 [ 11.161960] EAX: ffffffda EBX: 08168ac ECX: 00008241 EDX: 000001b6 [ 11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0 [ 11.161960] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Cc: [email protected] # 3.10 (requires 2e81a4e as a prereq) Reported-by: George Spelvin <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]> Signed-off-by: Nathan Chancellor <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent b9b98c2 commit 9b06cce

File tree

3 files changed

+74
-54
lines changed

3 files changed

+74
-54
lines changed

fs/ext4/inline.c

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
376376
static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
377377
unsigned int len)
378378
{
379-
int ret, size;
379+
int ret, size, no_expand;
380380
struct ext4_inode_info *ei = EXT4_I(inode);
381381

382382
if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
@@ -386,15 +386,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
386386
if (size < len)
387387
return -ENOSPC;
388388

389-
down_write(&EXT4_I(inode)->xattr_sem);
389+
ext4_write_lock_xattr(inode, &no_expand);
390390

391391
if (ei->i_inline_off)
392392
ret = ext4_update_inline_data(handle, inode, len);
393393
else
394394
ret = ext4_create_inline_data(handle, inode, len);
395395

396-
up_write(&EXT4_I(inode)->xattr_sem);
397-
396+
ext4_write_unlock_xattr(inode, &no_expand);
398397
return ret;
399398
}
400399

@@ -523,7 +522,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
523522
struct inode *inode,
524523
unsigned flags)
525524
{
526-
int ret, needed_blocks;
525+
int ret, needed_blocks, no_expand;
527526
handle_t *handle = NULL;
528527
int retries = 0, sem_held = 0;
529528
struct page *page = NULL;
@@ -563,7 +562,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
563562
goto out;
564563
}
565564

566-
down_write(&EXT4_I(inode)->xattr_sem);
565+
ext4_write_lock_xattr(inode, &no_expand);
567566
sem_held = 1;
568567
/* If some one has already done this for us, just exit. */
569568
if (!ext4_has_inline_data(inode)) {
@@ -599,7 +598,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
599598
page_cache_release(page);
600599
page = NULL;
601600
ext4_orphan_add(handle, inode);
602-
up_write(&EXT4_I(inode)->xattr_sem);
601+
ext4_write_unlock_xattr(inode, &no_expand);
603602
sem_held = 0;
604603
ext4_journal_stop(handle);
605604
handle = NULL;
@@ -625,7 +624,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
625624
page_cache_release(page);
626625
}
627626
if (sem_held)
628-
up_write(&EXT4_I(inode)->xattr_sem);
627+
ext4_write_unlock_xattr(inode, &no_expand);
629628
if (handle)
630629
ext4_journal_stop(handle);
631630
brelse(iloc.bh);
@@ -718,7 +717,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
718717
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
719718
unsigned copied, struct page *page)
720719
{
721-
int ret;
720+
int ret, no_expand;
722721
void *kaddr;
723722
struct ext4_iloc iloc;
724723

@@ -736,7 +735,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
736735
goto out;
737736
}
738737

739-
down_write(&EXT4_I(inode)->xattr_sem);
738+
ext4_write_lock_xattr(inode, &no_expand);
740739
BUG_ON(!ext4_has_inline_data(inode));
741740

742741
kaddr = kmap_atomic(page);
@@ -746,7 +745,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
746745
/* clear page dirty so that writepages wouldn't work for us. */
747746
ClearPageDirty(page);
748747

749-
up_write(&EXT4_I(inode)->xattr_sem);
748+
ext4_write_unlock_xattr(inode, &no_expand);
750749
brelse(iloc.bh);
751750
out:
752751
return copied;
@@ -757,7 +756,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
757756
unsigned len,
758757
struct page *page)
759758
{
760-
int ret;
759+
int ret, no_expand;
761760
void *kaddr;
762761
struct ext4_iloc iloc;
763762

@@ -767,11 +766,11 @@ ext4_journalled_write_inline_data(struct inode *inode,
767766
return NULL;
768767
}
769768

770-
down_write(&EXT4_I(inode)->xattr_sem);
769+
ext4_write_lock_xattr(inode, &no_expand);
771770
kaddr = kmap_atomic(page);
772771
ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
773772
kunmap_atomic(kaddr);
774-
up_write(&EXT4_I(inode)->xattr_sem);
773+
ext4_write_unlock_xattr(inode, &no_expand);
775774

776775
return iloc.bh;
777776
}
@@ -1255,7 +1254,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
12551254
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
12561255
struct dentry *dentry, struct inode *inode)
12571256
{
1258-
int ret, inline_size;
1257+
int ret, inline_size, no_expand;
12591258
void *inline_start;
12601259
struct ext4_iloc iloc;
12611260
struct inode *dir = d_inode(dentry->d_parent);
@@ -1264,7 +1263,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
12641263
if (ret)
12651264
return ret;
12661265

1267-
down_write(&EXT4_I(dir)->xattr_sem);
1266+
ext4_write_lock_xattr(dir, &no_expand);
12681267
if (!ext4_has_inline_data(dir))
12691268
goto out;
12701269

@@ -1310,7 +1309,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
13101309

13111310
out:
13121311
ext4_mark_inode_dirty(handle, dir);
1313-
up_write(&EXT4_I(dir)->xattr_sem);
1312+
ext4_write_unlock_xattr(dir, &no_expand);
13141313
brelse(iloc.bh);
13151314
return ret;
13161315
}
@@ -1670,15 +1669,15 @@ int ext4_delete_inline_entry(handle_t *handle,
16701669
struct buffer_head *bh,
16711670
int *has_inline_data)
16721671
{
1673-
int err, inline_size;
1672+
int err, inline_size, no_expand;
16741673
struct ext4_iloc iloc;
16751674
void *inline_start;
16761675

16771676
err = ext4_get_inode_loc(dir, &iloc);
16781677
if (err)
16791678
return err;
16801679

1681-
down_write(&EXT4_I(dir)->xattr_sem);
1680+
ext4_write_lock_xattr(dir, &no_expand);
16821681
if (!ext4_has_inline_data(dir)) {
16831682
*has_inline_data = 0;
16841683
goto out;
@@ -1713,7 +1712,7 @@ int ext4_delete_inline_entry(handle_t *handle,
17131712

17141713
ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
17151714
out:
1716-
up_write(&EXT4_I(dir)->xattr_sem);
1715+
ext4_write_unlock_xattr(dir, &no_expand);
17171716
brelse(iloc.bh);
17181717
if (err != -ENOENT)
17191718
ext4_std_error(dir->i_sb, err);
@@ -1812,11 +1811,11 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data)
18121811

18131812
int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
18141813
{
1815-
int ret;
1814+
int ret, no_expand;
18161815

1817-
down_write(&EXT4_I(inode)->xattr_sem);
1816+
ext4_write_lock_xattr(inode, &no_expand);
18181817
ret = ext4_destroy_inline_data_nolock(handle, inode);
1819-
up_write(&EXT4_I(inode)->xattr_sem);
1818+
ext4_write_unlock_xattr(inode, &no_expand);
18201819

18211820
return ret;
18221821
}
@@ -1901,7 +1900,7 @@ int ext4_try_to_evict_inline_data(handle_t *handle,
19011900
void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
19021901
{
19031902
handle_t *handle;
1904-
int inline_size, value_len, needed_blocks;
1903+
int inline_size, value_len, needed_blocks, no_expand;
19051904
size_t i_size;
19061905
void *value = NULL;
19071906
struct ext4_xattr_ibody_find is = {
@@ -1918,7 +1917,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
19181917
if (IS_ERR(handle))
19191918
return;
19201919

1921-
down_write(&EXT4_I(inode)->xattr_sem);
1920+
ext4_write_lock_xattr(inode, &no_expand);
19221921
if (!ext4_has_inline_data(inode)) {
19231922
*has_inline = 0;
19241923
ext4_journal_stop(handle);
@@ -1976,7 +1975,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
19761975
up_write(&EXT4_I(inode)->i_data_sem);
19771976
out:
19781977
brelse(is.iloc.bh);
1979-
up_write(&EXT4_I(inode)->xattr_sem);
1978+
ext4_write_unlock_xattr(inode, &no_expand);
19801979
kfree(value);
19811980
if (inode->i_nlink)
19821981
ext4_orphan_del(handle, inode);
@@ -1992,7 +1991,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
19921991

19931992
int ext4_convert_inline_data(struct inode *inode)
19941993
{
1995-
int error, needed_blocks;
1994+
int error, needed_blocks, no_expand;
19961995
handle_t *handle;
19971996
struct ext4_iloc iloc;
19981997

@@ -2014,15 +2013,10 @@ int ext4_convert_inline_data(struct inode *inode)
20142013
goto out_free;
20152014
}
20162015

2017-
down_write(&EXT4_I(inode)->xattr_sem);
2018-
if (!ext4_has_inline_data(inode)) {
2019-
up_write(&EXT4_I(inode)->xattr_sem);
2020-
goto out;
2021-
}
2022-
2023-
error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
2024-
up_write(&EXT4_I(inode)->xattr_sem);
2025-
out:
2016+
ext4_write_lock_xattr(inode, &no_expand);
2017+
if (ext4_has_inline_data(inode))
2018+
error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
2019+
ext4_write_unlock_xattr(inode, &no_expand);
20262020
ext4_journal_stop(handle);
20272021
out_free:
20282022
brelse(iloc.bh);

fs/ext4/xattr.c

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,16 +1117,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
11171117
struct ext4_xattr_block_find bs = {
11181118
.s = { .not_found = -ENODATA, },
11191119
};
1120-
unsigned long no_expand;
1120+
int no_expand;
11211121
int error;
11221122

11231123
if (!name)
11241124
return -EINVAL;
11251125
if (strlen(name) > 255)
11261126
return -ERANGE;
1127-
down_write(&EXT4_I(inode)->xattr_sem);
1128-
no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
1129-
ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
1127+
ext4_write_lock_xattr(inode, &no_expand);
11301128

11311129
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
11321130
if (error)
@@ -1187,7 +1185,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
11871185
ext4_xattr_update_super_block(handle, inode->i_sb);
11881186
inode->i_ctime = ext4_current_time(inode);
11891187
if (!value)
1190-
ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1188+
no_expand = 0;
11911189
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
11921190
/*
11931191
* The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1201,9 +1199,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
12011199
cleanup:
12021200
brelse(is.iloc.bh);
12031201
brelse(bs.bh);
1204-
if (no_expand == 0)
1205-
ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1206-
up_write(&EXT4_I(inode)->xattr_sem);
1202+
ext4_write_unlock_xattr(inode, &no_expand);
12071203
return error;
12081204
}
12091205

@@ -1287,12 +1283,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
12871283
int error = 0, tried_min_extra_isize = 0;
12881284
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
12891285
int isize_diff; /* How much do we need to grow i_extra_isize */
1286+
int no_expand;
1287+
1288+
if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
1289+
return 0;
12901290

1291-
down_write(&EXT4_I(inode)->xattr_sem);
1292-
/*
1293-
* Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
1294-
*/
1295-
ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
12961291
retry:
12971292
isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
12981293
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
@@ -1486,8 +1481,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
14861481
}
14871482
brelse(bh);
14881483
out:
1489-
ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1490-
up_write(&EXT4_I(inode)->xattr_sem);
1484+
ext4_write_unlock_xattr(inode, &no_expand);
14911485
return 0;
14921486

14931487
cleanup:
@@ -1499,10 +1493,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
14991493
kfree(bs);
15001494
brelse(bh);
15011495
/*
1502-
* We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
1503-
* size expansion failed.
1496+
* Inode size expansion failed; don't try again
15041497
*/
1505-
up_write(&EXT4_I(inode)->xattr_sem);
1498+
no_expand = 1;
1499+
ext4_write_unlock_xattr(inode, &no_expand);
15061500
return error;
15071501
}
15081502

fs/ext4/xattr.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,38 @@ extern const struct xattr_handler ext4_xattr_security_handler;
101101

102102
#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
103103

104+
/*
105+
* The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
106+
* The first is to signal that there the inline xattrs and data are
107+
* taking up so much space that we might as well not keep trying to
108+
* expand it. The second is that xattr_sem is taken for writing, so
109+
* we shouldn't try to recurse into the inode expansion. For this
110+
* second case, we need to make sure that we take save and restore the
111+
* NO_EXPAND state flag appropriately.
112+
*/
113+
static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
114+
{
115+
down_write(&EXT4_I(inode)->xattr_sem);
116+
*save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
117+
ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
118+
}
119+
120+
static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
121+
{
122+
if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
123+
return 0;
124+
*save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
125+
ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
126+
return 1;
127+
}
128+
129+
static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
130+
{
131+
if (*save == 0)
132+
ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
133+
up_write(&EXT4_I(inode)->xattr_sem);
134+
}
135+
104136
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
105137

106138
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);

0 commit comments

Comments
 (0)