@@ -1368,7 +1368,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x)
1368
1368
* that is not free, and 1 for any other page type.
1369
1369
* For 1 the page is returned with increased page count, otherwise not.
1370
1370
*/
1371
- static int get_any_page (struct page * p , unsigned long pfn , int flags )
1371
+ static int __get_any_page (struct page * p , unsigned long pfn , int flags )
1372
1372
{
1373
1373
int ret ;
1374
1374
@@ -1393,11 +1393,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1393
1393
if (!get_page_unless_zero (compound_head (p ))) {
1394
1394
if (PageHuge (p )) {
1395
1395
pr_info ("%s: %#lx free huge page\n" , __func__ , pfn );
1396
- ret = dequeue_hwpoisoned_huge_page ( compound_head ( p )) ;
1396
+ ret = 0 ;
1397
1397
} else if (is_free_buddy_page (p )) {
1398
1398
pr_info ("%s: %#lx free buddy page\n" , __func__ , pfn );
1399
- /* Set hwpoison bit while page is still isolated */
1400
- SetPageHWPoison (p );
1401
1399
ret = 0 ;
1402
1400
} else {
1403
1401
pr_info ("%s: %#lx: unknown zero refcount page type %lx\n" ,
@@ -1413,23 +1411,48 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1413
1411
return ret ;
1414
1412
}
1415
1413
1414
+ static int get_any_page (struct page * page , unsigned long pfn , int flags )
1415
+ {
1416
+ int ret = __get_any_page (page , pfn , flags );
1417
+
1418
+ if (ret == 1 && !PageHuge (page ) && !PageLRU (page )) {
1419
+ /*
1420
+ * Try to free it.
1421
+ */
1422
+ put_page (page );
1423
+ shake_page (page , 1 );
1424
+
1425
+ /*
1426
+ * Did it turn free?
1427
+ */
1428
+ ret = __get_any_page (page , pfn , 0 );
1429
+ if (!PageLRU (page )) {
1430
+ pr_info ("soft_offline: %#lx: unknown non LRU page type %lx\n" ,
1431
+ pfn , page -> flags );
1432
+ return - EIO ;
1433
+ }
1434
+ }
1435
+ return ret ;
1436
+ }
1437
+
1416
1438
static int soft_offline_huge_page (struct page * page , int flags )
1417
1439
{
1418
1440
int ret ;
1419
1441
unsigned long pfn = page_to_pfn (page );
1420
1442
struct page * hpage = compound_head (page );
1421
1443
1444
+ /*
1445
+ * This double-check of PageHWPoison is to avoid the race with
1446
+ * memory_failure(). See also comment in __soft_offline_page().
1447
+ */
1448
+ lock_page (hpage );
1422
1449
if (PageHWPoison (hpage )) {
1450
+ unlock_page (hpage );
1451
+ put_page (hpage );
1423
1452
pr_info ("soft offline: %#lx hugepage already poisoned\n" , pfn );
1424
- ret = - EBUSY ;
1425
- goto out ;
1453
+ return - EBUSY ;
1426
1454
}
1427
-
1428
- ret = get_any_page (page , pfn , flags );
1429
- if (ret < 0 )
1430
- goto out ;
1431
- if (ret == 0 )
1432
- goto done ;
1455
+ unlock_page (hpage );
1433
1456
1434
1457
/* Keep page count to indicate a given hugepage is isolated. */
1435
1458
ret = migrate_huge_page (hpage , new_page , MPOL_MF_MOVE_ALL , false,
@@ -1438,17 +1461,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
1438
1461
if (ret ) {
1439
1462
pr_info ("soft offline: %#lx: migration failed %d, type %lx\n" ,
1440
1463
pfn , ret , page -> flags );
1441
- goto out ;
1464
+ } else {
1465
+ set_page_hwpoison_huge_page (hpage );
1466
+ dequeue_hwpoisoned_huge_page (hpage );
1467
+ atomic_long_add (1 << compound_trans_order (hpage ),
1468
+ & num_poisoned_pages );
1442
1469
}
1443
- done :
1444
1470
/* keep elevated page count for bad page */
1445
- atomic_long_add (1 << compound_trans_order (hpage ), & num_poisoned_pages );
1446
- set_page_hwpoison_huge_page (hpage );
1447
- dequeue_hwpoisoned_huge_page (hpage );
1448
- out :
1449
1471
return ret ;
1450
1472
}
1451
1473
1474
+ static int __soft_offline_page (struct page * page , int flags );
1475
+
1452
1476
/**
1453
1477
* soft_offline_page - Soft offline a page.
1454
1478
* @page: page to offline
@@ -1477,62 +1501,60 @@ int soft_offline_page(struct page *page, int flags)
1477
1501
unsigned long pfn = page_to_pfn (page );
1478
1502
struct page * hpage = compound_trans_head (page );
1479
1503
1480
- if (PageHuge (page )) {
1481
- ret = soft_offline_huge_page ( page , flags );
1482
- goto out ;
1504
+ if (PageHWPoison (page )) {
1505
+ pr_info ( "soft offline: %#lx page already poisoned\n" , pfn );
1506
+ return - EBUSY ;
1483
1507
}
1484
- if (PageTransHuge (hpage )) {
1508
+ if (! PageHuge ( page ) && PageTransHuge (hpage )) {
1485
1509
if (PageAnon (hpage ) && unlikely (split_huge_page (hpage ))) {
1486
1510
pr_info ("soft offline: %#lx: failed to split THP\n" ,
1487
1511
pfn );
1488
- ret = - EBUSY ;
1489
- goto out ;
1512
+ return - EBUSY ;
1490
1513
}
1491
1514
}
1492
1515
1493
- if (PageHWPoison (page )) {
1494
- pr_info ("soft offline: %#lx page already poisoned\n" , pfn );
1495
- ret = - EBUSY ;
1496
- goto out ;
1497
- }
1498
-
1499
1516
ret = get_any_page (page , pfn , flags );
1500
1517
if (ret < 0 )
1501
- goto out ;
1502
- if (ret == 0 )
1503
- goto done ;
1504
-
1505
- /*
1506
- * Page cache page we can handle?
1507
- */
1508
- if (!PageLRU (page )) {
1509
- /*
1510
- * Try to free it.
1511
- */
1512
- put_page (page );
1513
- shake_page (page , 1 );
1514
-
1515
- /*
1516
- * Did it turn free?
1517
- */
1518
- ret = get_any_page (page , pfn , 0 );
1519
- if (ret < 0 )
1520
- goto out ;
1521
- if (ret == 0 )
1522
- goto done ;
1523
- }
1524
- if (!PageLRU (page )) {
1525
- pr_info ("soft_offline: %#lx: unknown non LRU page type %lx\n" ,
1526
- pfn , page -> flags );
1527
- ret = - EIO ;
1528
- goto out ;
1518
+ return ret ;
1519
+ if (ret ) { /* for in-use pages */
1520
+ if (PageHuge (page ))
1521
+ ret = soft_offline_huge_page (page , flags );
1522
+ else
1523
+ ret = __soft_offline_page (page , flags );
1524
+ } else { /* for free pages */
1525
+ if (PageHuge (page )) {
1526
+ set_page_hwpoison_huge_page (hpage );
1527
+ dequeue_hwpoisoned_huge_page (hpage );
1528
+ atomic_long_add (1 << compound_trans_order (hpage ),
1529
+ & num_poisoned_pages );
1530
+ } else {
1531
+ SetPageHWPoison (page );
1532
+ atomic_long_inc (& num_poisoned_pages );
1533
+ }
1529
1534
}
1535
+ /* keep elevated page count for bad page */
1536
+ return ret ;
1537
+ }
1538
+
1539
+ static int __soft_offline_page (struct page * page , int flags )
1540
+ {
1541
+ int ret ;
1542
+ unsigned long pfn = page_to_pfn (page );
1530
1543
1531
1544
/*
1532
- * Synchronized using the page lock with memory_failure()
1545
+ * Check PageHWPoison again inside page lock because PageHWPoison
1546
+ * is set by memory_failure() outside page lock. Note that
1547
+ * memory_failure() also double-checks PageHWPoison inside page lock,
1548
+ * so there's no race between soft_offline_page() and memory_failure().
1533
1549
*/
1534
1550
lock_page (page );
1535
1551
wait_on_page_writeback (page );
1552
+ if (PageHWPoison (page )) {
1553
+ unlock_page (page );
1554
+ put_page (page );
1555
+ pr_info ("soft offline: %#lx page already poisoned\n" , pfn );
1556
+ return - EBUSY ;
1557
+ }
1536
1558
/*
1537
1559
* Try to invalidate first. This should work for
1538
1560
* non dirty unmapped page cache pages.
@@ -1545,9 +1567,10 @@ int soft_offline_page(struct page *page, int flags)
1545
1567
*/
1546
1568
if (ret == 1 ) {
1547
1569
put_page (page );
1548
- ret = 0 ;
1549
1570
pr_info ("soft_offline: %#lx: invalidated\n" , pfn );
1550
- goto done ;
1571
+ SetPageHWPoison (page );
1572
+ atomic_long_inc (& num_poisoned_pages );
1573
+ return 0 ;
1551
1574
}
1552
1575
1553
1576
/*
@@ -1575,18 +1598,13 @@ int soft_offline_page(struct page *page, int flags)
1575
1598
pfn , ret , page -> flags );
1576
1599
if (ret > 0 )
1577
1600
ret = - EIO ;
1601
+ } else {
1602
+ SetPageHWPoison (page );
1603
+ atomic_long_inc (& num_poisoned_pages );
1578
1604
}
1579
1605
} else {
1580
1606
pr_info ("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n" ,
1581
1607
pfn , ret , page_count (page ), page -> flags );
1582
1608
}
1583
- if (ret )
1584
- goto out ;
1585
-
1586
- done :
1587
- /* keep elevated page count for bad page */
1588
- atomic_long_inc (& num_poisoned_pages );
1589
- SetPageHWPoison (page );
1590
- out :
1591
1609
return ret ;
1592
1610
}
0 commit comments