Skip to content

Commit 075a61d

Browse files
minatorvalds
authored andcommitted
hugetlb_cgroup: add accounting for shared mappings
For shared mappings, the pointer to the hugetlb_cgroup to uncharge lives in the resv_map entries, in file_region->reservation_counter. After a call to region_chg, we charge the approprate hugetlb_cgroup, and if successful, we pass on the hugetlb_cgroup info to a follow up region_add call. When a file_region entry is added to the resv_map via region_add, we put the pointer to that cgroup in file_region->reservation_counter. If charging doesn't succeed, we report the error to the caller, so that the kernel fails the reservation. On region_del, which is when the hugetlb memory is unreserved, we also uncharge the file_region->reservation_counter. [[email protected]: forward declare struct file_region] Signed-off-by: Mina Almasry <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: David Rientjes <[email protected]> Cc: Greg Thelen <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Sandipan Das <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Shuah Khan <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0db9d74 commit 075a61d

File tree

4 files changed

+155
-54
lines changed

4 files changed

+155
-54
lines changed

include/linux/hugetlb.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,41 @@ struct resv_map {
5757
struct cgroup_subsys_state *css;
5858
#endif
5959
};
60+
61+
/*
62+
* Region tracking -- allows tracking of reservations and instantiated pages
63+
* across the pages in a mapping.
64+
*
65+
* The region data structures are embedded into a resv_map and protected
66+
* by a resv_map's lock. The set of regions within the resv_map represent
67+
* reservations for huge pages, or huge pages that have already been
68+
* instantiated within the map. The from and to elements are huge page
69+
* indicies into the associated mapping. from indicates the starting index
70+
* of the region. to represents the first index past the end of the region.
71+
*
72+
* For example, a file region structure with from == 0 and to == 4 represents
73+
* four huge pages in a mapping. It is important to note that the to element
74+
* represents the first element past the end of the region. This is used in
75+
* arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
76+
*
77+
* Interval notation of the form [from, to) will be used to indicate that
78+
* the endpoint from is inclusive and to is exclusive.
79+
*/
80+
struct file_region {
81+
struct list_head link;
82+
long from;
83+
long to;
84+
#ifdef CONFIG_CGROUP_HUGETLB
85+
/*
86+
* On shared mappings, each reserved region appears as a struct
87+
* file_region in resv_map. These fields hold the info needed to
88+
* uncharge each reservation.
89+
*/
90+
struct page_counter *reservation_counter;
91+
struct cgroup_subsys_state *css;
92+
#endif
93+
};
94+
6095
extern struct resv_map *resv_map_alloc(void);
6196
void resv_map_release(struct kref *ref);
6297

include/linux/hugetlb_cgroup.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
struct hugetlb_cgroup;
2121
struct resv_map;
22+
struct file_region;
2223

2324
/*
2425
* Minimum page order trackable by hugetlb cgroup.
@@ -135,11 +136,21 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
135136
unsigned long start,
136137
unsigned long end);
137138

139+
extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
140+
struct file_region *rg,
141+
unsigned long nr_pages);
142+
138143
extern void hugetlb_cgroup_file_init(void) __init;
139144
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
140145
struct page *newhpage);
141146

142147
#else
148+
static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
149+
struct file_region *rg,
150+
unsigned long nr_pages)
151+
{
152+
}
153+
143154
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
144155
{
145156
return NULL;

mm/hugetlb.c

Lines changed: 94 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -220,31 +220,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
220220
return subpool_inode(file_inode(vma->vm_file));
221221
}
222222

223-
/*
224-
* Region tracking -- allows tracking of reservations and instantiated pages
225-
* across the pages in a mapping.
226-
*
227-
* The region data structures are embedded into a resv_map and protected
228-
* by a resv_map's lock. The set of regions within the resv_map represent
229-
* reservations for huge pages, or huge pages that have already been
230-
* instantiated within the map. The from and to elements are huge page
231-
* indicies into the associated mapping. from indicates the starting index
232-
* of the region. to represents the first index past the end of the region.
233-
*
234-
* For example, a file region structure with from == 0 and to == 4 represents
235-
* four huge pages in a mapping. It is important to note that the to element
236-
* represents the first element past the end of the region. This is used in
237-
* arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
238-
*
239-
* Interval notation of the form [from, to) will be used to indicate that
240-
* the endpoint from is inclusive and to is exclusive.
241-
*/
242-
struct file_region {
243-
struct list_head link;
244-
long from;
245-
long to;
246-
};
247-
248223
/* Helper that removes a struct file_region from the resv_map cache and returns
249224
* it for use.
250225
*/
@@ -266,14 +241,51 @@ get_file_region_entry_from_cache(struct resv_map *resv, long from, long to)
266241
return nrg;
267242
}
268243

244+
static void copy_hugetlb_cgroup_uncharge_info(struct file_region *nrg,
245+
struct file_region *rg)
246+
{
247+
#ifdef CONFIG_CGROUP_HUGETLB
248+
nrg->reservation_counter = rg->reservation_counter;
249+
nrg->css = rg->css;
250+
if (rg->css)
251+
css_get(rg->css);
252+
#endif
253+
}
254+
255+
/* Helper that records hugetlb_cgroup uncharge info. */
256+
static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
257+
struct hstate *h,
258+
struct resv_map *resv,
259+
struct file_region *nrg)
260+
{
261+
#ifdef CONFIG_CGROUP_HUGETLB
262+
if (h_cg) {
263+
nrg->reservation_counter =
264+
&h_cg->rsvd_hugepage[hstate_index(h)];
265+
nrg->css = &h_cg->css;
266+
if (!resv->pages_per_hpage)
267+
resv->pages_per_hpage = pages_per_huge_page(h);
268+
/* pages_per_hpage should be the same for all entries in
269+
* a resv_map.
270+
*/
271+
VM_BUG_ON(resv->pages_per_hpage != pages_per_huge_page(h));
272+
} else {
273+
nrg->reservation_counter = NULL;
274+
nrg->css = NULL;
275+
}
276+
#endif
277+
}
278+
269279
/* Must be called with resv->lock held. Calling this with count_only == true
270280
* will count the number of pages to be added but will not modify the linked
271281
* list. If regions_needed != NULL and count_only == true, then regions_needed
272282
* will indicate the number of file_regions needed in the cache to carry out to
273283
* add the regions for this range.
274284
*/
275285
static long add_reservation_in_range(struct resv_map *resv, long f, long t,
276-
long *regions_needed, bool count_only)
286+
struct hugetlb_cgroup *h_cg,
287+
struct hstate *h, long *regions_needed,
288+
bool count_only)
277289
{
278290
long add = 0;
279291
struct list_head *head = &resv->regions;
@@ -312,6 +324,8 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
312324
if (!count_only) {
313325
nrg = get_file_region_entry_from_cache(
314326
resv, last_accounted_offset, rg->from);
327+
record_hugetlb_cgroup_uncharge_info(h_cg, h,
328+
resv, nrg);
315329
list_add(&nrg->link, rg->link.prev);
316330
} else if (regions_needed)
317331
*regions_needed += 1;
@@ -328,6 +342,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
328342
if (!count_only) {
329343
nrg = get_file_region_entry_from_cache(
330344
resv, last_accounted_offset, t);
345+
record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
331346
list_add(&nrg->link, rg->link.prev);
332347
} else if (regions_needed)
333348
*regions_needed += 1;
@@ -416,15 +431,17 @@ static int allocate_file_region_entries(struct resv_map *resv,
416431
* 1 page will only require at most 1 entry.
417432
*/
418433
static long region_add(struct resv_map *resv, long f, long t,
419-
long in_regions_needed)
434+
long in_regions_needed, struct hstate *h,
435+
struct hugetlb_cgroup *h_cg)
420436
{
421437
long add = 0, actual_regions_needed = 0;
422438

423439
spin_lock(&resv->lock);
424440
retry:
425441

426442
/* Count how many regions are actually needed to execute this add. */
427-
add_reservation_in_range(resv, f, t, &actual_regions_needed, true);
443+
add_reservation_in_range(resv, f, t, NULL, NULL, &actual_regions_needed,
444+
true);
428445

429446
/*
430447
* Check for sufficient descriptors in the cache to accommodate
@@ -452,7 +469,7 @@ static long region_add(struct resv_map *resv, long f, long t,
452469
goto retry;
453470
}
454471

455-
add = add_reservation_in_range(resv, f, t, NULL, false);
472+
add = add_reservation_in_range(resv, f, t, h_cg, h, NULL, false);
456473

457474
resv->adds_in_progress -= in_regions_needed;
458475

@@ -489,7 +506,8 @@ static long region_chg(struct resv_map *resv, long f, long t,
489506
spin_lock(&resv->lock);
490507

491508
/* Count how many hugepages in this range are NOT respresented. */
492-
chg = add_reservation_in_range(resv, f, t, out_regions_needed, true);
509+
chg = add_reservation_in_range(resv, f, t, NULL, NULL,
510+
out_regions_needed, true);
493511

494512
if (*out_regions_needed == 0)
495513
*out_regions_needed = 1;
@@ -589,18 +607,26 @@ static long region_del(struct resv_map *resv, long f, long t)
589607
/* New entry for end of split region */
590608
nrg->from = t;
591609
nrg->to = rg->to;
610+
611+
copy_hugetlb_cgroup_uncharge_info(nrg, rg);
612+
592613
INIT_LIST_HEAD(&nrg->link);
593614

594615
/* Original entry is trimmed */
595616
rg->to = f;
596617

618+
hugetlb_cgroup_uncharge_file_region(
619+
resv, rg, nrg->to - nrg->from);
620+
597621
list_add(&nrg->link, &rg->link);
598622
nrg = NULL;
599623
break;
600624
}
601625

602626
if (f <= rg->from && t >= rg->to) { /* Remove entire region */
603627
del += rg->to - rg->from;
628+
hugetlb_cgroup_uncharge_file_region(resv, rg,
629+
rg->to - rg->from);
604630
list_del(&rg->link);
605631
kfree(rg);
606632
continue;
@@ -609,9 +635,15 @@ static long region_del(struct resv_map *resv, long f, long t)
609635
if (f <= rg->from) { /* Trim beginning of region */
610636
del += t - rg->from;
611637
rg->from = t;
638+
639+
hugetlb_cgroup_uncharge_file_region(resv, rg,
640+
t - rg->from);
612641
} else { /* Trim end of region */
613642
del += rg->to - f;
614643
rg->to = f;
644+
645+
hugetlb_cgroup_uncharge_file_region(resv, rg,
646+
rg->to - f);
615647
}
616648
}
617649

@@ -2124,7 +2156,7 @@ static long __vma_reservation_common(struct hstate *h,
21242156
VM_BUG_ON(dummy_out_regions_needed != 1);
21252157
break;
21262158
case VMA_COMMIT_RESV:
2127-
ret = region_add(resv, idx, idx + 1, 1);
2159+
ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
21282160
/* region_add calls of range 1 should never fail. */
21292161
VM_BUG_ON(ret < 0);
21302162
break;
@@ -2134,7 +2166,7 @@ static long __vma_reservation_common(struct hstate *h,
21342166
break;
21352167
case VMA_ADD_RESV:
21362168
if (vma->vm_flags & VM_MAYSHARE) {
2137-
ret = region_add(resv, idx, idx + 1, 1);
2169+
ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
21382170
/* region_add calls of range 1 should never fail. */
21392171
VM_BUG_ON(ret < 0);
21402172
} else {
@@ -4830,7 +4862,7 @@ int hugetlb_reserve_pages(struct inode *inode,
48304862
struct hstate *h = hstate_inode(inode);
48314863
struct hugepage_subpool *spool = subpool_inode(inode);
48324864
struct resv_map *resv_map;
4833-
struct hugetlb_cgroup *h_cg;
4865+
struct hugetlb_cgroup *h_cg = NULL;
48344866
long gbl_reserve, regions_needed = 0;
48354867

48364868
/* This should never happen */
@@ -4871,19 +4903,6 @@ int hugetlb_reserve_pages(struct inode *inode,
48714903

48724904
chg = to - from;
48734905

4874-
if (hugetlb_cgroup_charge_cgroup_rsvd(
4875-
hstate_index(h), chg * pages_per_huge_page(h),
4876-
&h_cg)) {
4877-
kref_put(&resv_map->refs, resv_map_release);
4878-
return -ENOMEM;
4879-
}
4880-
4881-
/*
4882-
* Since this branch handles private mappings, we attach the
4883-
* counter to uncharge for this reservation off resv_map.
4884-
*/
4885-
resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);
4886-
48874906
set_vma_resv_map(vma, resv_map);
48884907
set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
48894908
}
@@ -4893,6 +4912,21 @@ int hugetlb_reserve_pages(struct inode *inode,
48934912
goto out_err;
48944913
}
48954914

4915+
ret = hugetlb_cgroup_charge_cgroup_rsvd(
4916+
hstate_index(h), chg * pages_per_huge_page(h), &h_cg);
4917+
4918+
if (ret < 0) {
4919+
ret = -ENOMEM;
4920+
goto out_err;
4921+
}
4922+
4923+
if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
4924+
/* For private mappings, the hugetlb_cgroup uncharge info hangs
4925+
* of the resv_map.
4926+
*/
4927+
resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);
4928+
}
4929+
48964930
/*
48974931
* There must be enough pages in the subpool for the mapping. If
48984932
* the subpool has a minimum size, there may be some global
@@ -4901,7 +4935,7 @@ int hugetlb_reserve_pages(struct inode *inode,
49014935
gbl_reserve = hugepage_subpool_get_pages(spool, chg);
49024936
if (gbl_reserve < 0) {
49034937
ret = -ENOSPC;
4904-
goto out_err;
4938+
goto out_uncharge_cgroup;
49054939
}
49064940

49074941
/*
@@ -4910,9 +4944,7 @@ int hugetlb_reserve_pages(struct inode *inode,
49104944
*/
49114945
ret = hugetlb_acct_memory(h, gbl_reserve);
49124946
if (ret < 0) {
4913-
/* put back original number of pages, chg */
4914-
(void)hugepage_subpool_put_pages(spool, chg);
4915-
goto out_err;
4947+
goto out_put_pages;
49164948
}
49174949

49184950
/*
@@ -4927,13 +4959,11 @@ int hugetlb_reserve_pages(struct inode *inode,
49274959
* else has to be done for private mappings here
49284960
*/
49294961
if (!vma || vma->vm_flags & VM_MAYSHARE) {
4930-
add = region_add(resv_map, from, to, regions_needed);
4962+
add = region_add(resv_map, from, to, regions_needed, h, h_cg);
49314963

49324964
if (unlikely(add < 0)) {
49334965
hugetlb_acct_memory(h, -gbl_reserve);
4934-
/* put back original number of pages, chg */
4935-
(void)hugepage_subpool_put_pages(spool, chg);
4936-
goto out_err;
4966+
goto out_put_pages;
49374967
} else if (unlikely(chg > add)) {
49384968
/*
49394969
* pages in this range were added to the reserve
@@ -4944,12 +4974,22 @@ int hugetlb_reserve_pages(struct inode *inode,
49444974
*/
49454975
long rsv_adjust;
49464976

4977+
hugetlb_cgroup_uncharge_cgroup_rsvd(
4978+
hstate_index(h),
4979+
(chg - add) * pages_per_huge_page(h), h_cg);
4980+
49474981
rsv_adjust = hugepage_subpool_put_pages(spool,
49484982
chg - add);
49494983
hugetlb_acct_memory(h, -rsv_adjust);
49504984
}
49514985
}
49524986
return 0;
4987+
out_put_pages:
4988+
/* put back original number of pages, chg */
4989+
(void)hugepage_subpool_put_pages(spool, chg);
4990+
out_uncharge_cgroup:
4991+
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
4992+
chg * pages_per_huge_page(h), h_cg);
49534993
out_err:
49544994
if (!vma || vma->vm_flags & VM_MAYSHARE)
49554995
/* Only call region_abort if the region_chg succeeded but the

0 commit comments

Comments
 (0)