Skip to content

Commit 3fea5a4

Browse files
hnaztorvalds
authored andcommitted
mm: memcontrol: convert page cache to a new mem_cgroup_charge() API
The try/commit/cancel protocol that memcg uses dates back to when pages used to be uncharged upon removal from the page cache, and thus couldn't be committed before the insertion had succeeded. Nowadays, pages are uncharged when they are physically freed; it doesn't matter whether the insertion was successful or not. For the page cache, the transaction dance has become unnecessary. Introduce a mem_cgroup_charge() function that simply charges a newly allocated page to a cgroup and sets up page->mem_cgroup in one single step. If the insertion fails, the caller doesn't have to do anything but free/put the page. Then switch the page cache over to this new API. Subsequent patches will also convert anon pages, but it needs a bit more prep work. Right now, memcg depends on page->mapping being already set up at the time of charging, so that it can maintain its own MEMCG_CACHE and MEMCG_RSS counters. For anon, page->mapping is set under the same pte lock under which the page is publishd, so a single charge point that can block doesn't work there just yet. The following prep patches will replace the private memcg counters with the generic vmstat counters, thus removing the page->mapping dependency, then complete the transition to the new single-point charge API and delete the old transactional scheme. v2: leave shmem swapcache when charging fails to avoid double IO (Joonsoo) v3: rebase on preceeding shmem simplification patch Signed-off-by: Johannes Weiner <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Reviewed-by: Alex Shi <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Joonsoo Kim <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Balbir Singh <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 6caa6a0 commit 3fea5a4

File tree

4 files changed

+77
-59
lines changed

4 files changed

+77
-59
lines changed

include/linux/memcontrol.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,10 @@ int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
365365
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
366366
bool lrucare);
367367
void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
368+
369+
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask,
370+
bool lrucare);
371+
368372
void mem_cgroup_uncharge(struct page *page);
369373
void mem_cgroup_uncharge_list(struct list_head *page_list);
370374

@@ -872,6 +876,12 @@ static inline void mem_cgroup_cancel_charge(struct page *page,
872876
{
873877
}
874878

879+
static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
880+
gfp_t gfp_mask, bool lrucare)
881+
{
882+
return 0;
883+
}
884+
875885
static inline void mem_cgroup_uncharge(struct page *page)
876886
{
877887
}

mm/filemap.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -832,25 +832,23 @@ static int __add_to_page_cache_locked(struct page *page,
832832
{
833833
XA_STATE(xas, &mapping->i_pages, offset);
834834
int huge = PageHuge(page);
835-
struct mem_cgroup *memcg;
836835
int error;
837836
void *old;
838837

839838
VM_BUG_ON_PAGE(!PageLocked(page), page);
840839
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
841840
mapping_set_update(&xas, mapping);
842841

843-
if (!huge) {
844-
error = mem_cgroup_try_charge(page, current->mm,
845-
gfp_mask, &memcg);
846-
if (error)
847-
return error;
848-
}
849-
850842
get_page(page);
851843
page->mapping = mapping;
852844
page->index = offset;
853845

846+
if (!huge) {
847+
error = mem_cgroup_charge(page, current->mm, gfp_mask, false);
848+
if (error)
849+
goto error;
850+
}
851+
854852
do {
855853
xas_lock_irq(&xas);
856854
old = xas_load(&xas);
@@ -874,20 +872,18 @@ static int __add_to_page_cache_locked(struct page *page,
874872
xas_unlock_irq(&xas);
875873
} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
876874

877-
if (xas_error(&xas))
875+
if (xas_error(&xas)) {
876+
error = xas_error(&xas);
878877
goto error;
878+
}
879879

880-
if (!huge)
881-
mem_cgroup_commit_charge(page, memcg, false);
882880
trace_mm_filemap_add_to_page_cache(page);
883881
return 0;
884882
error:
885883
page->mapping = NULL;
886884
/* Leave page->index set: truncation relies upon it */
887-
if (!huge)
888-
mem_cgroup_cancel_charge(page, memcg);
889885
put_page(page);
890-
return xas_error(&xas);
886+
return error;
891887
}
892888
ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
893889

mm/memcontrol.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6637,6 +6637,33 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
66376637
cancel_charge(memcg, nr_pages);
66386638
}
66396639

6640+
/**
6641+
* mem_cgroup_charge - charge a newly allocated page to a cgroup
6642+
* @page: page to charge
6643+
* @mm: mm context of the victim
6644+
* @gfp_mask: reclaim mode
6645+
* @lrucare: page might be on the LRU already
6646+
*
6647+
* Try to charge @page to the memcg that @mm belongs to, reclaiming
6648+
* pages according to @gfp_mask if necessary.
6649+
*
6650+
* Returns 0 on success. Otherwise, an error code is returned.
6651+
*/
6652+
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask,
6653+
bool lrucare)
6654+
{
6655+
struct mem_cgroup *memcg;
6656+
int ret;
6657+
6658+
VM_BUG_ON_PAGE(!page->mapping, page);
6659+
6660+
ret = mem_cgroup_try_charge(page, mm, gfp_mask, &memcg);
6661+
if (ret)
6662+
return ret;
6663+
mem_cgroup_commit_charge(page, memcg, lrucare);
6664+
return 0;
6665+
}
6666+
66406667
struct uncharge_gather {
66416668
struct mem_cgroup *memcg;
66426669
unsigned long pgpgout;
@@ -6684,8 +6711,6 @@ static void uncharge_batch(const struct uncharge_gather *ug)
66846711
static void uncharge_page(struct page *page, struct uncharge_gather *ug)
66856712
{
66866713
VM_BUG_ON_PAGE(PageLRU(page), page);
6687-
VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) &&
6688-
!PageHWPoison(page) , page);
66896714

66906715
if (!page->mem_cgroup)
66916716
return;

mm/shmem.c

Lines changed: 30 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -605,11 +605,13 @@ static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
605605
*/
606606
static int shmem_add_to_page_cache(struct page *page,
607607
struct address_space *mapping,
608-
pgoff_t index, void *expected, gfp_t gfp)
608+
pgoff_t index, void *expected, gfp_t gfp,
609+
struct mm_struct *charge_mm)
609610
{
610611
XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
611612
unsigned long i = 0;
612613
unsigned long nr = compound_nr(page);
614+
int error;
613615

614616
VM_BUG_ON_PAGE(PageTail(page), page);
615617
VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -621,6 +623,16 @@ static int shmem_add_to_page_cache(struct page *page,
621623
page->mapping = mapping;
622624
page->index = index;
623625

626+
error = mem_cgroup_charge(page, charge_mm, gfp, PageSwapCache(page));
627+
if (error) {
628+
if (!PageSwapCache(page) && PageTransHuge(page)) {
629+
count_vm_event(THP_FILE_FALLBACK);
630+
count_vm_event(THP_FILE_FALLBACK_CHARGE);
631+
}
632+
goto error;
633+
}
634+
cgroup_throttle_swaprate(page, gfp);
635+
624636
do {
625637
void *entry;
626638
xas_lock_irq(&xas);
@@ -648,12 +660,15 @@ static int shmem_add_to_page_cache(struct page *page,
648660
} while (xas_nomem(&xas, gfp));
649661

650662
if (xas_error(&xas)) {
651-
page->mapping = NULL;
652-
page_ref_sub(page, nr);
653-
return xas_error(&xas);
663+
error = xas_error(&xas);
664+
goto error;
654665
}
655666

656667
return 0;
668+
error:
669+
page->mapping = NULL;
670+
page_ref_sub(page, nr);
671+
return error;
657672
}
658673

659674
/*
@@ -1619,7 +1634,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
16191634
struct address_space *mapping = inode->i_mapping;
16201635
struct shmem_inode_info *info = SHMEM_I(inode);
16211636
struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
1622-
struct mem_cgroup *memcg;
16231637
struct page *page;
16241638
swp_entry_t swap;
16251639
int error;
@@ -1664,18 +1678,11 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
16641678
goto failed;
16651679
}
16661680

1667-
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg);
1668-
if (error)
1669-
goto failed;
1670-
16711681
error = shmem_add_to_page_cache(page, mapping, index,
1672-
swp_to_radix_entry(swap), gfp);
1673-
if (error) {
1674-
mem_cgroup_cancel_charge(page, memcg);
1682+
swp_to_radix_entry(swap), gfp,
1683+
charge_mm);
1684+
if (error)
16751685
goto failed;
1676-
}
1677-
1678-
mem_cgroup_commit_charge(page, memcg, true);
16791686

16801687
spin_lock_irq(&info->lock);
16811688
info->swapped--;
@@ -1722,7 +1729,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
17221729
struct shmem_inode_info *info = SHMEM_I(inode);
17231730
struct shmem_sb_info *sbinfo;
17241731
struct mm_struct *charge_mm;
1725-
struct mem_cgroup *memcg;
17261732
struct page *page;
17271733
enum sgp_type sgp_huge = sgp;
17281734
pgoff_t hindex = index;
@@ -1847,21 +1853,11 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
18471853
if (sgp == SGP_WRITE)
18481854
__SetPageReferenced(page);
18491855

1850-
error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg);
1851-
if (error) {
1852-
if (PageTransHuge(page)) {
1853-
count_vm_event(THP_FILE_FALLBACK);
1854-
count_vm_event(THP_FILE_FALLBACK_CHARGE);
1855-
}
1856-
goto unacct;
1857-
}
18581856
error = shmem_add_to_page_cache(page, mapping, hindex,
1859-
NULL, gfp & GFP_RECLAIM_MASK);
1860-
if (error) {
1861-
mem_cgroup_cancel_charge(page, memcg);
1857+
NULL, gfp & GFP_RECLAIM_MASK,
1858+
charge_mm);
1859+
if (error)
18621860
goto unacct;
1863-
}
1864-
mem_cgroup_commit_charge(page, memcg, false);
18651861
lru_cache_add_anon(page);
18661862

18671863
spin_lock_irq(&info->lock);
@@ -2299,7 +2295,6 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
22992295
struct address_space *mapping = inode->i_mapping;
23002296
gfp_t gfp = mapping_gfp_mask(mapping);
23012297
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
2302-
struct mem_cgroup *memcg;
23032298
spinlock_t *ptl;
23042299
void *page_kaddr;
23052300
struct page *page;
@@ -2349,16 +2344,10 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
23492344
if (unlikely(offset >= max_off))
23502345
goto out_release;
23512346

2352-
ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg);
2353-
if (ret)
2354-
goto out_release;
2355-
23562347
ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL,
2357-
gfp & GFP_RECLAIM_MASK);
2348+
gfp & GFP_RECLAIM_MASK, dst_mm);
23582349
if (ret)
2359-
goto out_release_uncharge;
2360-
2361-
mem_cgroup_commit_charge(page, memcg, false);
2350+
goto out_release;
23622351

23632352
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
23642353
if (dst_vma->vm_flags & VM_WRITE)
@@ -2379,11 +2368,11 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
23792368
ret = -EFAULT;
23802369
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
23812370
if (unlikely(offset >= max_off))
2382-
goto out_release_uncharge_unlock;
2371+
goto out_release_unlock;
23832372

23842373
ret = -EEXIST;
23852374
if (!pte_none(*dst_pte))
2386-
goto out_release_uncharge_unlock;
2375+
goto out_release_unlock;
23872376

23882377
lru_cache_add_anon(page);
23892378

@@ -2404,12 +2393,10 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
24042393
ret = 0;
24052394
out:
24062395
return ret;
2407-
out_release_uncharge_unlock:
2396+
out_release_unlock:
24082397
pte_unmap_unlock(dst_pte, ptl);
24092398
ClearPageDirty(page);
24102399
delete_from_page_cache(page);
2411-
out_release_uncharge:
2412-
mem_cgroup_cancel_charge(page, memcg);
24132400
out_release:
24142401
unlock_page(page);
24152402
put_page(page);

0 commit comments

Comments
 (0)