[FSDP] Add always_wrap policy (#73687)

rohan-varma · cyyever · commit 780af584496f · 2022-03-09T15:41:02.000+08:00
Summary: Add a smaller helper policy that always returns True to automatically always wrap all FSDP submodules. This is the first and simplest step of providing a set of policies that allow users to seamlessly experiment with different FSDP config. More Context: pytorch/pytorch#68789 Pull Request resolved: pytorch/pytorch#73687 Reviewed By: jbschlosser, zhaojuanmao Differential Revision: D34625801 Pulled By: rohan-varma fbshipit-source-id: f20c951f8d62ea29b504543c93acd546247d8206 (cherry picked from commit 3b0bf02bc8bb236ee09e2fa986d52bbf5231efc3)
diff --git a/test/distributed/fsdp/test_wrap.py b/test/distributed/fsdp/test_wrap.py
@@ -5,7 +5,6 @@
 import os
 import tempfile
 import unittest
-
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -15,6 +14,7 @@
     BackwardPrefetch,
 )
 from torch.distributed.fsdp.wrap import (
+    always_wrap_policy,
     default_auto_wrap_policy,
     enable_wrap,
     wrap,
@@ -67,6 +67,15 @@ def get_model(cuda=True):
                 sequential = sequential.cuda()
             return sequential
 
+        @staticmethod
+        def verify_model_all_wrapped(cls, model):
+            cls.assertTrue(isinstance(model, FSDP))
+            cls.assertTrue(isinstance(model.module[0], FSDP))
+            cls.assertTrue(isinstance(model.module[1], FSDP))
+            cls.assertTrue(isinstance(model.module[2], FSDP))
+            cls.assertTrue(isinstance(model.module[2].module[0], FSDP))
+            cls.assertTrue(isinstance(model.module[2].module[1], FSDP))
+
         @staticmethod
         def verify_model(cls, model):
             cls.assertTrue(isinstance(model, FSDP))
@@ -257,6 +266,16 @@ def test_wrap_override_defaults(self):
         self.assertEqual(layer.rank, 0)
         self.assertEqual(layer.world_size, 2)
 
+    @unittest.skipIf(not torch.cuda.is_available(), "Test Requires CUDA")
+    def test_always_wrap(self):
+        """
+        Test to ensure that if `always_wrap_policy` is
+        passed into FSDP, all submodules are wrapped.
+        """
+        seq = TestFSDPWrap.NestedSequentialModel.get_model(cuda=True)
+        model = FSDP(seq, process_group=self.process_group, fsdp_auto_wrap_policy=always_wrap_policy)
+        TestFSDPWrap.NestedSequentialModel.verify_model_all_wrapped(self, model)
+
     def test_auto_wrap_api(self):
         """
         Test to ensure with auto wrap, we wrap child modules correctly based on the min_num_params.
diff --git a/torch/distributed/fsdp/wrap.py b/torch/distributed/fsdp/wrap.py
@@ -9,6 +9,15 @@
 import torch.nn as nn
 
 
+def always_wrap_policy(*args, **kwargs) -> bool:
+    """
+    A simple wrapper policy that always returns ``True``,
+    i.e. when passed as the `auto_wrap_policy` into FSDP,
+    this will result in all submodules being wrapped as
+    distinct FSDP instances.
+    """
+    return True
+
 def default_auto_wrap_policy(
     module: nn.Module,
     recurse: bool,