Skip to content

Commit edbe83a

Browse files
committed
md/raid5: allow the stripe_cache to grow and shrink.
The default setting of 256 stripe_heads is probably much too small for many configurations. So it is best to make it auto-configure. Shrinking the cache under memory pressure is easy. The only interesting part here is that we put a fairly high cost ('seeks') on shrinking the cache as the cost is greater than just having to read more data, it reduces parallelism. Growing the cache on demand needs to be done carefully. If we allow fast growth, that can upset memory balance as lots of dirty memory can quickly turn into lots of memory queued in the stripe_cache. It is important for the raid5 block device to appear congested to allow write-throttling to work. So we only add stripes slowly. We set a flag when an allocation fails because all stripes are in use, allocate at a convenient time when that flag is set, and don't allow it to be set again until at least one stripe_head has been released for re-use. This means that a spurt of requests will only cause one stripe_head to be allocated, but a steady stream of requests will slowly increase the cache size - until memory pressure puts it back again. It could take hours to reach a steady state. The value written to, and displayed in, stripe_cache_size is used as a minimum. The cache can grow above this and shrink back down to it. The actual size is not directly visible, though it can be deduced to some extent by watching stripe_cache_active. Signed-off-by: NeilBrown <[email protected]>
1 parent 5423399 commit edbe83a

File tree

2 files changed

+71
-8
lines changed

2 files changed

+71
-8
lines changed

drivers/md/raid5.c

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,13 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
672672
*(conf->hash_locks + hash));
673673
sh = __find_stripe(conf, sector, conf->generation - previous);
674674
if (!sh) {
675-
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
675+
if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
676676
sh = get_free_stripe(conf, hash);
677+
if (!sh && llist_empty(&conf->released_stripes) &&
678+
!test_bit(R5_DID_ALLOC, &conf->cache_state))
679+
set_bit(R5_ALLOC_MORE,
680+
&conf->cache_state);
681+
}
677682
if (noblock && sh == NULL)
678683
break;
679684
if (!sh) {
@@ -5761,6 +5766,8 @@ static void raid5d(struct md_thread *thread)
57615766
int batch_size, released;
57625767

57635768
released = release_stripe_list(conf, conf->temp_inactive_list);
5769+
if (released)
5770+
clear_bit(R5_DID_ALLOC, &conf->cache_state);
57645771

57655772
if (
57665773
!list_empty(&conf->bitmap_list)) {
@@ -5799,6 +5806,13 @@ static void raid5d(struct md_thread *thread)
57995806
pr_debug("%d stripes handled\n", handled);
58005807

58015808
spin_unlock_irq(&conf->device_lock);
5809+
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
5810+
grow_one_stripe(conf, __GFP_NOWARN);
5811+
/* Set flag even if allocation failed. This helps
5812+
* slow down allocation requests when mem is short
5813+
*/
5814+
set_bit(R5_DID_ALLOC, &conf->cache_state);
5815+
}
58025816

58035817
async_tx_issue_pending_all();
58045818
blk_finish_plug(&plug);
@@ -5814,7 +5828,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
58145828
spin_lock(&mddev->lock);
58155829
conf = mddev->private;
58165830
if (conf)
5817-
ret = sprintf(page, "%d\n", conf->max_nr_stripes);
5831+
ret = sprintf(page, "%d\n", conf->min_nr_stripes);
58185832
spin_unlock(&mddev->lock);
58195833
return ret;
58205834
}
@@ -5828,10 +5842,12 @@ raid5_set_cache_size(struct mddev *mddev, int size)
58285842
if (size <= 16 || size > 32768)
58295843
return -EINVAL;
58305844

5845+
conf->min_nr_stripes = size;
58315846
while (size < conf->max_nr_stripes &&
58325847
drop_one_stripe(conf))
58335848
;
58345849

5850+
58355851
err = md_allow_write(mddev);
58365852
if (err)
58375853
return err;
@@ -5947,7 +5963,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
59475963
conf = mddev->private;
59485964
if (!conf)
59495965
err = -ENODEV;
5950-
else if (new > conf->max_nr_stripes)
5966+
else if (new > conf->min_nr_stripes)
59515967
err = -EINVAL;
59525968
else
59535969
conf->bypass_threshold = new;
@@ -6228,6 +6244,8 @@ static void raid5_free_percpu(struct r5conf *conf)
62286244

62296245
static void free_conf(struct r5conf *conf)
62306246
{
6247+
if (conf->shrinker.seeks)
6248+
unregister_shrinker(&conf->shrinker);
62316249
free_thread_groups(conf);
62326250
shrink_stripes(conf);
62336251
raid5_free_percpu(conf);
@@ -6295,6 +6313,30 @@ static int raid5_alloc_percpu(struct r5conf *conf)
62956313
return err;
62966314
}
62976315

6316+
static unsigned long raid5_cache_scan(struct shrinker *shrink,
6317+
struct shrink_control *sc)
6318+
{
6319+
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
6320+
int ret = 0;
6321+
while (ret < sc->nr_to_scan) {
6322+
if (drop_one_stripe(conf) == 0)
6323+
return SHRINK_STOP;
6324+
ret++;
6325+
}
6326+
return ret;
6327+
}
6328+
6329+
static unsigned long raid5_cache_count(struct shrinker *shrink,
6330+
struct shrink_control *sc)
6331+
{
6332+
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
6333+
6334+
if (conf->max_nr_stripes < conf->min_nr_stripes)
6335+
/* unlikely, but not impossible */
6336+
return 0;
6337+
return conf->max_nr_stripes - conf->min_nr_stripes;
6338+
}
6339+
62986340
static struct r5conf *setup_conf(struct mddev *mddev)
62996341
{
63006342
struct r5conf *conf;
@@ -6445,17 +6487,29 @@ static struct r5conf *setup_conf(struct mddev *mddev)
64456487
conf->prev_algo = mddev->layout;
64466488
}
64476489

6448-
memory = NR_STRIPES * (sizeof(struct stripe_head) +
6490+
conf->min_nr_stripes = NR_STRIPES;
6491+
memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
64496492
max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
64506493
atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
6451-
if (grow_stripes(conf, NR_STRIPES)) {
6494+
if (grow_stripes(conf, conf->min_nr_stripes)) {
64526495
printk(KERN_ERR
64536496
"md/raid:%s: couldn't allocate %dkB for buffers\n",
64546497
mdname(mddev), memory);
64556498
goto abort;
64566499
} else
64576500
printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
64586501
mdname(mddev), memory);
6502+
/*
6503+
* Losing a stripe head costs more than the time to refill it,
6504+
* it reduces the queue depth and so can hurt throughput.
6505+
* So set it rather large, scaled by number of devices.
6506+
*/
6507+
conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4;
6508+
conf->shrinker.scan_objects = raid5_cache_scan;
6509+
conf->shrinker.count_objects = raid5_cache_count;
6510+
conf->shrinker.batch = 128;
6511+
conf->shrinker.flags = 0;
6512+
register_shrinker(&conf->shrinker);
64596513

64606514
sprintf(pers_name, "raid%d", mddev->new_level);
64616515
conf->thread = md_register_thread(raid5d, mddev, pers_name);
@@ -7097,9 +7151,9 @@ static int check_stripe_cache(struct mddev *mddev)
70977151
*/
70987152
struct r5conf *conf = mddev->private;
70997153
if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
7100-
> conf->max_nr_stripes ||
7154+
> conf->min_nr_stripes ||
71017155
((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
7102-
> conf->max_nr_stripes) {
7156+
> conf->min_nr_stripes) {
71037157
printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes. Needed %lu\n",
71047158
mdname(mddev),
71057159
((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)

drivers/md/raid5.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ struct r5conf {
433433
int max_degraded;
434434
int raid_disks;
435435
int max_nr_stripes;
436+
int min_nr_stripes;
436437

437438
/* reshape_progress is the leading edge of a 'reshape'
438439
* It has value MaxSector when no reshape is happening
@@ -513,7 +514,15 @@ struct r5conf {
513514
#define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked,
514515
* waiting for 25% to be free
515516
*/
516-
517+
#define R5_ALLOC_MORE 2 /* It might help to allocate another
518+
* stripe.
519+
*/
520+
#define R5_DID_ALLOC 4 /* A stripe was allocated, don't allocate
521+
* more until at least one has been
522+
* released. This avoids flooding
523+
* the cache.
524+
*/
525+
struct shrinker shrinker;
517526
int pool_size; /* number of disks in stripeheads in pool */
518527
spinlock_t device_lock;
519528
struct disk_info *disks;

0 commit comments

Comments
 (0)