pytorch · anijain2305 · Apr 7, 2022 · Apr 7, 2022
diff --git a/torchbench.py b/torchbench.py
@@ -71,19 +71,16 @@
     "maml",
     # Known issues with training
     "demucs",  # https://github.com/pytorch/benchmark/pull/639
-    "densenet121",  # https://github.com/pytorch/benchmark/issues/652
-    "hf_Albert",  # https://github.com/pytorch/benchmark/issues/652
     "hf_Reformer",  # Can only be used in the training phase
-    # AOT Autograd known issues
-    "dlrm",  # No sparse support
-    "resnet50_quantized_qat",  # Con2DBnRelu
     # Known TorchDynamo bug
     "hf_GPT2",  # Hard to debug stashed tensor issue
     "tacotron2",  # Model uses Variable
 }
 
 # Some models have bad train dataset. We read eval dataset.
-ONLY_EVAL_DATASET = {"yolov3"}
+# yolov3 - seems to have different number of inputs between eval and train
+# densenet121 - OOM for train, using eval for now.
+ONLY_EVAL_DATASET = {"yolov3", "densenet121"}
 
 # These models support only train mode. So accuracy checking can't be done in
 # eval mode.
@@ -93,6 +90,8 @@
 REQUIRE_HIGHER_TOLERANCE = {
     "alexnet",
     "attention_is_all_you_need_pytorch",
+    "densenet121",
+    "hf_Albert",
     "vgg16",
     "mobilenet_v3_large",
 }
@@ -574,6 +573,11 @@ def main():
         action="store_true",
         help="Generates AOT Autograd stats like how mnay graphs are sent to AOT",
     )
+    parser.add_argument(
+        "--disable-functionalization",
+        action="store_true",
+        help="Disables functionalization",
+    )
     group = parser.add_mutually_exclusive_group()
     group.add_argument(
         "--coverage", action="store_true", help="(default) " + help(coverage_experiment)
@@ -856,6 +860,9 @@ def main():
     if output_filename:
         output_filename = os.path.join(torchdynamo.config.base_dir, output_filename)
 
+    if args.disable_functionalization:
+        torchdynamo.config.normalize_ir = False
+
     if args.minimum_call_count:
         torchdynamo.config.minimum_call_count = args.minimum_call_count
     if args.only:

diff --git a/torchdynamo/optimizations/training.py b/torchdynamo/optimizations/training.py
@@ -2,6 +2,7 @@
 
 import torch
 
+from torchdynamo import config
 from torchdynamo.utils import clone_inputs
 from torchdynamo.utils import count_calls
 from torchdynamo.utils import counters
@@ -27,13 +28,14 @@ def __init__(self, gm: torch.fx.GraphModule, example_inputs):
         counters["aot_autograd"]["total"] += 1
         self.use_fallback = False
         self.original_example_inputs = example_inputs
-        try:
-            self.gm = normalize_ir(gm, self.example_inputs)
-        except Exception:
-            log.debug("TorchDynamo unable to remove mutation")
-            self.gm = gm
-            self.use_fallback = True
-            pass
+        self.gm = gm
+        if config.normalize_ir:
+            try:
+                self.gm = normalize_ir(gm, self.example_inputs)
+            except Exception:
+                log.debug("TorchDynamo unable to remove mutation")
+                self.use_fallback = True
+                pass
 
         gm_inputs = list(filter(lambda x: x.op == "placeholder", gm.graph.nodes))
 

diff --git a/torchdynamo/testing.py b/torchdynamo/testing.py
@@ -84,6 +84,10 @@ def same(a, b, cos_similarity=False, tol=1e-4):
                 return False
         return True
     elif isinstance(a, torch.Tensor):
+        if a.is_sparse:
+            assert b.is_sparse
+            a = a.to_dense()
+            b = b.to_dense()
         assert isinstance(b, torch.Tensor)
         if cos_similarity:
             # TRT will bring error loss larger than current threshold. Use cosine similarity as replacement