Skip to content

Commit 83c8266

Browse files
kdaveJosef Bacik
authored and
Josef Bacik
committed
btrfs: try harder to allocate raid56 stripe cache
The stripe hash table is large, starting with allocation order 4 and can go as high as order 7 in case lock debugging is turned on and structure padding happens. Observed mount failure: mount: page allocation failure: order:7, mode:0x200050 Pid: 8234, comm: mount Tainted: G W 3.8.0-default+ #267 Call Trace: [<ffffffff81114353>] warn_alloc_failed+0xf3/0x140 [<ffffffff811171d2>] ? __alloc_pages_direct_compact+0x92/0x250 [<ffffffff81117ac3>] __alloc_pages_nodemask+0x733/0x9d0 [<ffffffff81152878>] ? cache_alloc_refill+0x3f8/0x840 [<ffffffff811528bc>] cache_alloc_refill+0x43c/0x840 [<ffffffff811302eb>] ? is_kernel_percpu_address+0x4b/0x90 [<ffffffffa00a00ac>] ? btrfs_alloc_stripe_hash_table+0x5c/0x130 [btrfs] [<ffffffff811531d7>] kmem_cache_alloc_trace+0x247/0x270 [<ffffffffa00a00ac>] btrfs_alloc_stripe_hash_table+0x5c/0x130 [btrfs] [<ffffffffa003133f>] open_ctree+0xb2f/0x1f90 [btrfs] [<ffffffff81397289>] ? string+0x49/0xe0 [<ffffffff813987b3>] ? vsnprintf+0x443/0x5d0 [<ffffffffa0007cb6>] btrfs_mount+0x526/0x600 [btrfs] [<ffffffff8115127c>] ? cache_alloc_debugcheck_after+0x4c/0x200 [<ffffffff81162b90>] mount_fs+0x20/0xe0 [<ffffffff8117db26>] vfs_kern_mount+0x76/0x120 [<ffffffff811801b6>] do_mount+0x386/0x980 [<ffffffff8112a5cb>] ? strndup_user+0x5b/0x80 [<ffffffff81180840>] sys_mount+0x90/0xe0 [<ffffffff81962e99>] system_call_fastpath+0x16/0x1b Signed-off-by: David Sterba <[email protected]> Signed-off-by: Josef Bacik <[email protected]>
1 parent 88e081b commit 83c8266

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

fs/btrfs/disk-io.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2197,7 +2197,7 @@ int open_ctree(struct super_block *sb,
21972197

21982198
ret = btrfs_alloc_stripe_hash_table(fs_info);
21992199
if (ret) {
2200-
err = -ENOMEM;
2200+
err = ret;
22012201
goto fail_alloc;
22022202
}
22032203

fs/btrfs/raid56.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,25 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
188188
struct btrfs_stripe_hash *h;
189189
int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
190190
int i;
191+
int table_size;
191192

192193
if (info->stripe_hash_table)
193194
return 0;
194195

195-
table = kzalloc(sizeof(*table) + sizeof(*h) * num_entries, GFP_NOFS);
196-
if (!table)
197-
return -ENOMEM;
196+
/*
197+
* The table is large, starting with order 4 and can go as high as
198+
* order 7 in case lock debugging is turned on.
199+
*
200+
* Try harder to allocate and fallback to vmalloc to lower the chance
201+
* of a failing mount.
202+
*/
203+
table_size = sizeof(*table) + sizeof(*h) * num_entries;
204+
table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
205+
if (!table) {
206+
table = vzalloc(table_size);
207+
if (!table)
208+
return -ENOMEM;
209+
}
198210

199211
spin_lock_init(&table->cache_lock);
200212
INIT_LIST_HEAD(&table->stripe_cache);
@@ -209,8 +221,12 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
209221
}
210222

211223
x = cmpxchg(&info->stripe_hash_table, NULL, table);
212-
if (x)
213-
kfree(x);
224+
if (x) {
225+
if (is_vmalloc_addr(x))
226+
vfree(x);
227+
else
228+
kfree(x);
229+
}
214230
return 0;
215231
}
216232

@@ -420,7 +436,10 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
420436
if (!info->stripe_hash_table)
421437
return;
422438
btrfs_clear_rbio_cache(info);
423-
kfree(info->stripe_hash_table);
439+
if (is_vmalloc_addr(info->stripe_hash_table))
440+
vfree(info->stripe_hash_table);
441+
else
442+
kfree(info->stripe_hash_table);
424443
info->stripe_hash_table = NULL;
425444
}
426445

0 commit comments

Comments
 (0)