Skip to content

Commit af8fae7

Browse files
Naoya Horiguchitorvalds
Naoya Horiguchi
authored andcommitted
mm/memory-failure.c: clean up soft_offline_page()
Currently soft_offline_page() is hard to maintain because it has many return points and goto statements. All of this mess come from get_any_page(). This function should only get page refcount as the name implies, but it does some page isolating actions like SetPageHWPoison() and dequeuing hugepage. This patch corrects it and introduces some internal subroutines to make soft offlining code more readable and maintainable. Signed-off-by: Naoya Horiguchi <[email protected]> Reviewed-by: Andi Kleen <[email protected]> Cc: Tony Luck <[email protected]> Cc: Wu Fengguang <[email protected]> Cc: Xishi Qiu <[email protected]> Cc: Jiang Liu <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 293c07e commit af8fae7

File tree

1 file changed

+87
-69
lines changed

1 file changed

+87
-69
lines changed

mm/memory-failure.c

Lines changed: 87 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,7 +1368,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x)
13681368
* that is not free, and 1 for any other page type.
13691369
* For 1 the page is returned with increased page count, otherwise not.
13701370
*/
1371-
static int get_any_page(struct page *p, unsigned long pfn, int flags)
1371+
static int __get_any_page(struct page *p, unsigned long pfn, int flags)
13721372
{
13731373
int ret;
13741374

@@ -1393,11 +1393,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
13931393
if (!get_page_unless_zero(compound_head(p))) {
13941394
if (PageHuge(p)) {
13951395
pr_info("%s: %#lx free huge page\n", __func__, pfn);
1396-
ret = dequeue_hwpoisoned_huge_page(compound_head(p));
1396+
ret = 0;
13971397
} else if (is_free_buddy_page(p)) {
13981398
pr_info("%s: %#lx free buddy page\n", __func__, pfn);
1399-
/* Set hwpoison bit while page is still isolated */
1400-
SetPageHWPoison(p);
14011399
ret = 0;
14021400
} else {
14031401
pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
@@ -1413,23 +1411,48 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
14131411
return ret;
14141412
}
14151413

1414+
static int get_any_page(struct page *page, unsigned long pfn, int flags)
1415+
{
1416+
int ret = __get_any_page(page, pfn, flags);
1417+
1418+
if (ret == 1 && !PageHuge(page) && !PageLRU(page)) {
1419+
/*
1420+
* Try to free it.
1421+
*/
1422+
put_page(page);
1423+
shake_page(page, 1);
1424+
1425+
/*
1426+
* Did it turn free?
1427+
*/
1428+
ret = __get_any_page(page, pfn, 0);
1429+
if (!PageLRU(page)) {
1430+
pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
1431+
pfn, page->flags);
1432+
return -EIO;
1433+
}
1434+
}
1435+
return ret;
1436+
}
1437+
14161438
static int soft_offline_huge_page(struct page *page, int flags)
14171439
{
14181440
int ret;
14191441
unsigned long pfn = page_to_pfn(page);
14201442
struct page *hpage = compound_head(page);
14211443

1444+
/*
1445+
* This double-check of PageHWPoison is to avoid the race with
1446+
* memory_failure(). See also comment in __soft_offline_page().
1447+
*/
1448+
lock_page(hpage);
14221449
if (PageHWPoison(hpage)) {
1450+
unlock_page(hpage);
1451+
put_page(hpage);
14231452
pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
1424-
ret = -EBUSY;
1425-
goto out;
1453+
return -EBUSY;
14261454
}
1427-
1428-
ret = get_any_page(page, pfn, flags);
1429-
if (ret < 0)
1430-
goto out;
1431-
if (ret == 0)
1432-
goto done;
1455+
unlock_page(hpage);
14331456

14341457
/* Keep page count to indicate a given hugepage is isolated. */
14351458
ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false,
@@ -1438,17 +1461,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
14381461
if (ret) {
14391462
pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
14401463
pfn, ret, page->flags);
1441-
goto out;
1464+
} else {
1465+
set_page_hwpoison_huge_page(hpage);
1466+
dequeue_hwpoisoned_huge_page(hpage);
1467+
atomic_long_add(1 << compound_trans_order(hpage),
1468+
&num_poisoned_pages);
14421469
}
1443-
done:
14441470
/* keep elevated page count for bad page */
1445-
atomic_long_add(1 << compound_trans_order(hpage), &num_poisoned_pages);
1446-
set_page_hwpoison_huge_page(hpage);
1447-
dequeue_hwpoisoned_huge_page(hpage);
1448-
out:
14491471
return ret;
14501472
}
14511473

1474+
static int __soft_offline_page(struct page *page, int flags);
1475+
14521476
/**
14531477
* soft_offline_page - Soft offline a page.
14541478
* @page: page to offline
@@ -1477,62 +1501,60 @@ int soft_offline_page(struct page *page, int flags)
14771501
unsigned long pfn = page_to_pfn(page);
14781502
struct page *hpage = compound_trans_head(page);
14791503

1480-
if (PageHuge(page)) {
1481-
ret = soft_offline_huge_page(page, flags);
1482-
goto out;
1504+
if (PageHWPoison(page)) {
1505+
pr_info("soft offline: %#lx page already poisoned\n", pfn);
1506+
return -EBUSY;
14831507
}
1484-
if (PageTransHuge(hpage)) {
1508+
if (!PageHuge(page) && PageTransHuge(hpage)) {
14851509
if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
14861510
pr_info("soft offline: %#lx: failed to split THP\n",
14871511
pfn);
1488-
ret = -EBUSY;
1489-
goto out;
1512+
return -EBUSY;
14901513
}
14911514
}
14921515

1493-
if (PageHWPoison(page)) {
1494-
pr_info("soft offline: %#lx page already poisoned\n", pfn);
1495-
ret = -EBUSY;
1496-
goto out;
1497-
}
1498-
14991516
ret = get_any_page(page, pfn, flags);
15001517
if (ret < 0)
1501-
goto out;
1502-
if (ret == 0)
1503-
goto done;
1504-
1505-
/*
1506-
* Page cache page we can handle?
1507-
*/
1508-
if (!PageLRU(page)) {
1509-
/*
1510-
* Try to free it.
1511-
*/
1512-
put_page(page);
1513-
shake_page(page, 1);
1514-
1515-
/*
1516-
* Did it turn free?
1517-
*/
1518-
ret = get_any_page(page, pfn, 0);
1519-
if (ret < 0)
1520-
goto out;
1521-
if (ret == 0)
1522-
goto done;
1523-
}
1524-
if (!PageLRU(page)) {
1525-
pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
1526-
pfn, page->flags);
1527-
ret = -EIO;
1528-
goto out;
1518+
return ret;
1519+
if (ret) { /* for in-use pages */
1520+
if (PageHuge(page))
1521+
ret = soft_offline_huge_page(page, flags);
1522+
else
1523+
ret = __soft_offline_page(page, flags);
1524+
} else { /* for free pages */
1525+
if (PageHuge(page)) {
1526+
set_page_hwpoison_huge_page(hpage);
1527+
dequeue_hwpoisoned_huge_page(hpage);
1528+
atomic_long_add(1 << compound_trans_order(hpage),
1529+
&num_poisoned_pages);
1530+
} else {
1531+
SetPageHWPoison(page);
1532+
atomic_long_inc(&num_poisoned_pages);
1533+
}
15291534
}
1535+
/* keep elevated page count for bad page */
1536+
return ret;
1537+
}
1538+
1539+
static int __soft_offline_page(struct page *page, int flags)
1540+
{
1541+
int ret;
1542+
unsigned long pfn = page_to_pfn(page);
15301543

15311544
/*
1532-
* Synchronized using the page lock with memory_failure()
1545+
* Check PageHWPoison again inside page lock because PageHWPoison
1546+
* is set by memory_failure() outside page lock. Note that
1547+
* memory_failure() also double-checks PageHWPoison inside page lock,
1548+
* so there's no race between soft_offline_page() and memory_failure().
15331549
*/
15341550
lock_page(page);
15351551
wait_on_page_writeback(page);
1552+
if (PageHWPoison(page)) {
1553+
unlock_page(page);
1554+
put_page(page);
1555+
pr_info("soft offline: %#lx page already poisoned\n", pfn);
1556+
return -EBUSY;
1557+
}
15361558
/*
15371559
* Try to invalidate first. This should work for
15381560
* non dirty unmapped page cache pages.
@@ -1545,9 +1567,10 @@ int soft_offline_page(struct page *page, int flags)
15451567
*/
15461568
if (ret == 1) {
15471569
put_page(page);
1548-
ret = 0;
15491570
pr_info("soft_offline: %#lx: invalidated\n", pfn);
1550-
goto done;
1571+
SetPageHWPoison(page);
1572+
atomic_long_inc(&num_poisoned_pages);
1573+
return 0;
15511574
}
15521575

15531576
/*
@@ -1575,18 +1598,13 @@ int soft_offline_page(struct page *page, int flags)
15751598
pfn, ret, page->flags);
15761599
if (ret > 0)
15771600
ret = -EIO;
1601+
} else {
1602+
SetPageHWPoison(page);
1603+
atomic_long_inc(&num_poisoned_pages);
15781604
}
15791605
} else {
15801606
pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
15811607
pfn, ret, page_count(page), page->flags);
15821608
}
1583-
if (ret)
1584-
goto out;
1585-
1586-
done:
1587-
/* keep elevated page count for bad page */
1588-
atomic_long_inc(&num_poisoned_pages);
1589-
SetPageHWPoison(page);
1590-
out:
15911609
return ret;
15921610
}

0 commit comments

Comments
 (0)