@@ -220,31 +220,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
220
220
return subpool_inode (file_inode (vma -> vm_file ));
221
221
}
222
222
223
- /*
224
- * Region tracking -- allows tracking of reservations and instantiated pages
225
- * across the pages in a mapping.
226
- *
227
- * The region data structures are embedded into a resv_map and protected
228
- * by a resv_map's lock. The set of regions within the resv_map represent
229
- * reservations for huge pages, or huge pages that have already been
230
- * instantiated within the map. The from and to elements are huge page
231
- * indicies into the associated mapping. from indicates the starting index
232
- * of the region. to represents the first index past the end of the region.
233
- *
234
- * For example, a file region structure with from == 0 and to == 4 represents
235
- * four huge pages in a mapping. It is important to note that the to element
236
- * represents the first element past the end of the region. This is used in
237
- * arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
238
- *
239
- * Interval notation of the form [from, to) will be used to indicate that
240
- * the endpoint from is inclusive and to is exclusive.
241
- */
242
- struct file_region {
243
- struct list_head link ;
244
- long from ;
245
- long to ;
246
- };
247
-
248
223
/* Helper that removes a struct file_region from the resv_map cache and returns
249
224
* it for use.
250
225
*/
@@ -266,14 +241,51 @@ get_file_region_entry_from_cache(struct resv_map *resv, long from, long to)
266
241
return nrg ;
267
242
}
268
243
244
+ static void copy_hugetlb_cgroup_uncharge_info (struct file_region * nrg ,
245
+ struct file_region * rg )
246
+ {
247
+ #ifdef CONFIG_CGROUP_HUGETLB
248
+ nrg -> reservation_counter = rg -> reservation_counter ;
249
+ nrg -> css = rg -> css ;
250
+ if (rg -> css )
251
+ css_get (rg -> css );
252
+ #endif
253
+ }
254
+
255
+ /* Helper that records hugetlb_cgroup uncharge info. */
256
+ static void record_hugetlb_cgroup_uncharge_info (struct hugetlb_cgroup * h_cg ,
257
+ struct hstate * h ,
258
+ struct resv_map * resv ,
259
+ struct file_region * nrg )
260
+ {
261
+ #ifdef CONFIG_CGROUP_HUGETLB
262
+ if (h_cg ) {
263
+ nrg -> reservation_counter =
264
+ & h_cg -> rsvd_hugepage [hstate_index (h )];
265
+ nrg -> css = & h_cg -> css ;
266
+ if (!resv -> pages_per_hpage )
267
+ resv -> pages_per_hpage = pages_per_huge_page (h );
268
+ /* pages_per_hpage should be the same for all entries in
269
+ * a resv_map.
270
+ */
271
+ VM_BUG_ON (resv -> pages_per_hpage != pages_per_huge_page (h ));
272
+ } else {
273
+ nrg -> reservation_counter = NULL ;
274
+ nrg -> css = NULL ;
275
+ }
276
+ #endif
277
+ }
278
+
269
279
/* Must be called with resv->lock held. Calling this with count_only == true
270
280
* will count the number of pages to be added but will not modify the linked
271
281
* list. If regions_needed != NULL and count_only == true, then regions_needed
272
282
* will indicate the number of file_regions needed in the cache to carry out to
273
283
* add the regions for this range.
274
284
*/
275
285
static long add_reservation_in_range (struct resv_map * resv , long f , long t ,
276
- long * regions_needed , bool count_only )
286
+ struct hugetlb_cgroup * h_cg ,
287
+ struct hstate * h , long * regions_needed ,
288
+ bool count_only )
277
289
{
278
290
long add = 0 ;
279
291
struct list_head * head = & resv -> regions ;
@@ -312,6 +324,8 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
312
324
if (!count_only ) {
313
325
nrg = get_file_region_entry_from_cache (
314
326
resv , last_accounted_offset , rg -> from );
327
+ record_hugetlb_cgroup_uncharge_info (h_cg , h ,
328
+ resv , nrg );
315
329
list_add (& nrg -> link , rg -> link .prev );
316
330
} else if (regions_needed )
317
331
* regions_needed += 1 ;
@@ -328,6 +342,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
328
342
if (!count_only ) {
329
343
nrg = get_file_region_entry_from_cache (
330
344
resv , last_accounted_offset , t );
345
+ record_hugetlb_cgroup_uncharge_info (h_cg , h , resv , nrg );
331
346
list_add (& nrg -> link , rg -> link .prev );
332
347
} else if (regions_needed )
333
348
* regions_needed += 1 ;
@@ -416,15 +431,17 @@ static int allocate_file_region_entries(struct resv_map *resv,
416
431
* 1 page will only require at most 1 entry.
417
432
*/
418
433
static long region_add (struct resv_map * resv , long f , long t ,
419
- long in_regions_needed )
434
+ long in_regions_needed , struct hstate * h ,
435
+ struct hugetlb_cgroup * h_cg )
420
436
{
421
437
long add = 0 , actual_regions_needed = 0 ;
422
438
423
439
spin_lock (& resv -> lock );
424
440
retry :
425
441
426
442
/* Count how many regions are actually needed to execute this add. */
427
- add_reservation_in_range (resv , f , t , & actual_regions_needed , true);
443
+ add_reservation_in_range (resv , f , t , NULL , NULL , & actual_regions_needed ,
444
+ true);
428
445
429
446
/*
430
447
* Check for sufficient descriptors in the cache to accommodate
@@ -452,7 +469,7 @@ static long region_add(struct resv_map *resv, long f, long t,
452
469
goto retry ;
453
470
}
454
471
455
- add = add_reservation_in_range (resv , f , t , NULL , false);
472
+ add = add_reservation_in_range (resv , f , t , h_cg , h , NULL , false);
456
473
457
474
resv -> adds_in_progress -= in_regions_needed ;
458
475
@@ -489,7 +506,8 @@ static long region_chg(struct resv_map *resv, long f, long t,
489
506
spin_lock (& resv -> lock );
490
507
491
508
/* Count how many hugepages in this range are NOT respresented. */
492
- chg = add_reservation_in_range (resv , f , t , out_regions_needed , true);
509
+ chg = add_reservation_in_range (resv , f , t , NULL , NULL ,
510
+ out_regions_needed , true);
493
511
494
512
if (* out_regions_needed == 0 )
495
513
* out_regions_needed = 1 ;
@@ -589,18 +607,26 @@ static long region_del(struct resv_map *resv, long f, long t)
589
607
/* New entry for end of split region */
590
608
nrg -> from = t ;
591
609
nrg -> to = rg -> to ;
610
+
611
+ copy_hugetlb_cgroup_uncharge_info (nrg , rg );
612
+
592
613
INIT_LIST_HEAD (& nrg -> link );
593
614
594
615
/* Original entry is trimmed */
595
616
rg -> to = f ;
596
617
618
+ hugetlb_cgroup_uncharge_file_region (
619
+ resv , rg , nrg -> to - nrg -> from );
620
+
597
621
list_add (& nrg -> link , & rg -> link );
598
622
nrg = NULL ;
599
623
break ;
600
624
}
601
625
602
626
if (f <= rg -> from && t >= rg -> to ) { /* Remove entire region */
603
627
del += rg -> to - rg -> from ;
628
+ hugetlb_cgroup_uncharge_file_region (resv , rg ,
629
+ rg -> to - rg -> from );
604
630
list_del (& rg -> link );
605
631
kfree (rg );
606
632
continue ;
@@ -609,9 +635,15 @@ static long region_del(struct resv_map *resv, long f, long t)
609
635
if (f <= rg -> from ) { /* Trim beginning of region */
610
636
del += t - rg -> from ;
611
637
rg -> from = t ;
638
+
639
+ hugetlb_cgroup_uncharge_file_region (resv , rg ,
640
+ t - rg -> from );
612
641
} else { /* Trim end of region */
613
642
del += rg -> to - f ;
614
643
rg -> to = f ;
644
+
645
+ hugetlb_cgroup_uncharge_file_region (resv , rg ,
646
+ rg -> to - f );
615
647
}
616
648
}
617
649
@@ -2124,7 +2156,7 @@ static long __vma_reservation_common(struct hstate *h,
2124
2156
VM_BUG_ON (dummy_out_regions_needed != 1 );
2125
2157
break ;
2126
2158
case VMA_COMMIT_RESV :
2127
- ret = region_add (resv , idx , idx + 1 , 1 );
2159
+ ret = region_add (resv , idx , idx + 1 , 1 , NULL , NULL );
2128
2160
/* region_add calls of range 1 should never fail. */
2129
2161
VM_BUG_ON (ret < 0 );
2130
2162
break ;
@@ -2134,7 +2166,7 @@ static long __vma_reservation_common(struct hstate *h,
2134
2166
break ;
2135
2167
case VMA_ADD_RESV :
2136
2168
if (vma -> vm_flags & VM_MAYSHARE ) {
2137
- ret = region_add (resv , idx , idx + 1 , 1 );
2169
+ ret = region_add (resv , idx , idx + 1 , 1 , NULL , NULL );
2138
2170
/* region_add calls of range 1 should never fail. */
2139
2171
VM_BUG_ON (ret < 0 );
2140
2172
} else {
@@ -4830,7 +4862,7 @@ int hugetlb_reserve_pages(struct inode *inode,
4830
4862
struct hstate * h = hstate_inode (inode );
4831
4863
struct hugepage_subpool * spool = subpool_inode (inode );
4832
4864
struct resv_map * resv_map ;
4833
- struct hugetlb_cgroup * h_cg ;
4865
+ struct hugetlb_cgroup * h_cg = NULL ;
4834
4866
long gbl_reserve , regions_needed = 0 ;
4835
4867
4836
4868
/* This should never happen */
@@ -4871,19 +4903,6 @@ int hugetlb_reserve_pages(struct inode *inode,
4871
4903
4872
4904
chg = to - from ;
4873
4905
4874
- if (hugetlb_cgroup_charge_cgroup_rsvd (
4875
- hstate_index (h ), chg * pages_per_huge_page (h ),
4876
- & h_cg )) {
4877
- kref_put (& resv_map -> refs , resv_map_release );
4878
- return - ENOMEM ;
4879
- }
4880
-
4881
- /*
4882
- * Since this branch handles private mappings, we attach the
4883
- * counter to uncharge for this reservation off resv_map.
4884
- */
4885
- resv_map_set_hugetlb_cgroup_uncharge_info (resv_map , h_cg , h );
4886
-
4887
4906
set_vma_resv_map (vma , resv_map );
4888
4907
set_vma_resv_flags (vma , HPAGE_RESV_OWNER );
4889
4908
}
@@ -4893,6 +4912,21 @@ int hugetlb_reserve_pages(struct inode *inode,
4893
4912
goto out_err ;
4894
4913
}
4895
4914
4915
+ ret = hugetlb_cgroup_charge_cgroup_rsvd (
4916
+ hstate_index (h ), chg * pages_per_huge_page (h ), & h_cg );
4917
+
4918
+ if (ret < 0 ) {
4919
+ ret = - ENOMEM ;
4920
+ goto out_err ;
4921
+ }
4922
+
4923
+ if (vma && !(vma -> vm_flags & VM_MAYSHARE ) && h_cg ) {
4924
+ /* For private mappings, the hugetlb_cgroup uncharge info hangs
4925
+ * of the resv_map.
4926
+ */
4927
+ resv_map_set_hugetlb_cgroup_uncharge_info (resv_map , h_cg , h );
4928
+ }
4929
+
4896
4930
/*
4897
4931
* There must be enough pages in the subpool for the mapping. If
4898
4932
* the subpool has a minimum size, there may be some global
@@ -4901,7 +4935,7 @@ int hugetlb_reserve_pages(struct inode *inode,
4901
4935
gbl_reserve = hugepage_subpool_get_pages (spool , chg );
4902
4936
if (gbl_reserve < 0 ) {
4903
4937
ret = - ENOSPC ;
4904
- goto out_err ;
4938
+ goto out_uncharge_cgroup ;
4905
4939
}
4906
4940
4907
4941
/*
@@ -4910,9 +4944,7 @@ int hugetlb_reserve_pages(struct inode *inode,
4910
4944
*/
4911
4945
ret = hugetlb_acct_memory (h , gbl_reserve );
4912
4946
if (ret < 0 ) {
4913
- /* put back original number of pages, chg */
4914
- (void )hugepage_subpool_put_pages (spool , chg );
4915
- goto out_err ;
4947
+ goto out_put_pages ;
4916
4948
}
4917
4949
4918
4950
/*
@@ -4927,13 +4959,11 @@ int hugetlb_reserve_pages(struct inode *inode,
4927
4959
* else has to be done for private mappings here
4928
4960
*/
4929
4961
if (!vma || vma -> vm_flags & VM_MAYSHARE ) {
4930
- add = region_add (resv_map , from , to , regions_needed );
4962
+ add = region_add (resv_map , from , to , regions_needed , h , h_cg );
4931
4963
4932
4964
if (unlikely (add < 0 )) {
4933
4965
hugetlb_acct_memory (h , - gbl_reserve );
4934
- /* put back original number of pages, chg */
4935
- (void )hugepage_subpool_put_pages (spool , chg );
4936
- goto out_err ;
4966
+ goto out_put_pages ;
4937
4967
} else if (unlikely (chg > add )) {
4938
4968
/*
4939
4969
* pages in this range were added to the reserve
@@ -4944,12 +4974,22 @@ int hugetlb_reserve_pages(struct inode *inode,
4944
4974
*/
4945
4975
long rsv_adjust ;
4946
4976
4977
+ hugetlb_cgroup_uncharge_cgroup_rsvd (
4978
+ hstate_index (h ),
4979
+ (chg - add ) * pages_per_huge_page (h ), h_cg );
4980
+
4947
4981
rsv_adjust = hugepage_subpool_put_pages (spool ,
4948
4982
chg - add );
4949
4983
hugetlb_acct_memory (h , - rsv_adjust );
4950
4984
}
4951
4985
}
4952
4986
return 0 ;
4987
+ out_put_pages :
4988
+ /* put back original number of pages, chg */
4989
+ (void )hugepage_subpool_put_pages (spool , chg );
4990
+ out_uncharge_cgroup :
4991
+ hugetlb_cgroup_uncharge_cgroup_rsvd (hstate_index (h ),
4992
+ chg * pages_per_huge_page (h ), h_cg );
4953
4993
out_err :
4954
4994
if (!vma || vma -> vm_flags & VM_MAYSHARE )
4955
4995
/* Only call region_abort if the region_chg succeeded but the
0 commit comments