pytorch
diff --git a/‎.github/workflows/correctness.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/correctness.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/v1-bisection.yml
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/v1-bisection.yml
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/v2-bisection.yml
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/v2-bisection.yml
Lines changed: 5 additions & 1 deletion
diff --git a/‎.gitignore
Lines changed: 0 additions & 4 deletions b/‎.gitignore
Lines changed: 0 additions & 4 deletions
diff --git a/‎.gitmodules
Lines changed: 3 additions & 18 deletions b/‎.gitmodules
Lines changed: 3 additions & 18 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎bisection.py
Lines changed: 9 additions & 8 deletions b/‎bisection.py
Lines changed: 9 additions & 8 deletions
diff --git a/‎install.py
Lines changed: 2 additions & 2 deletions b/‎install.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎requirements.txt
Lines changed: 1 addition & 0 deletions b/‎requirements.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎run_e2e.py
Lines changed: 4 additions & 2 deletions b/‎run_e2e.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎run_microbenchmarks.py
Lines changed: 16 additions & 7 deletions b/‎run_microbenchmarks.py
Lines changed: 16 additions & 7 deletions
diff --git a/‎run_sweep.py
Lines changed: 3 additions & 1 deletion b/‎run_sweep.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎scripts/install_cuda.sh
Lines changed: 5 additions & 5 deletions b/‎scripts/install_cuda.sh
Lines changed: 5 additions & 5 deletions
diff --git a/‎submodules/FAMBench b/‎submodules/FAMBench
diff --git a/‎test.py
Lines changed: 19 additions & 15 deletions b/‎test.py
Lines changed: 19 additions & 15 deletions
diff --git a/‎test_bench.py
Lines changed: 8 additions & 4 deletions b/‎test_bench.py
Lines changed: 8 additions & 4 deletions
@@ -1,6 +1,6 @@
 name: TorchBench Correctness Testing
 on:
-  pull_request:
+  workflow_dispatch:
 env:
   PYTHON_VERSION: "3.8"
   CUDA_VERSION: "11.3"
 
@@ -28,9 +28,13 @@ jobs:
         run: |
           conda create -y -n "$BISECT_CONDA_ENV" python="${PYTHON_VERSION}"
           . activate "$BISECT_CONDA_ENV"
-          conda install -y numpy requests=2.22 ninja pyyaml mkl mkl-include setuptools cmake cffi \
+          # pytorch doesn't support cmake>3.22
+          # See: https://github.com/pytorch/pytorch/issues/74985
+          conda install -y numpy requests ninja pyyaml mkl mkl-include setuptools cmake=3.22 cffi \
                            typing_extensions future six dataclasses tabulate gitpython tqdm
           conda install -y -c pytorch "${MAGMA_VERSION}"
+          # Pin ffmpeg version to 4.4.1. See: https://github.com/pytorch/vision/issues/5616
+          conda install -y ffmpeg=4.4.1
       - name: Bisection
         run: |
           export BISECT_ISSUE="${{ github.event.inputs.issue_name }}"
 
@@ -28,10 +28,14 @@ jobs:
         run: |
           conda create -y -n "${BISECT_CONDA_ENV}" python="${PYTHON_VER}"
           . activate "${BISECT_CONDA_ENV}"
-          conda install -y numpy requests ninja pyyaml mkl mkl-include setuptools cmake cffi \
+          # pytorch doesn't support cmake>3.22
+          # See: https://github.com/pytorch/pytorch/issues/74985
+          conda install -y numpy requests ninja pyyaml mkl mkl-include setuptools cmake=3.22 cffi \
                            typing_extensions future six dataclasses tabulate gitpython git-lfs tqdm
           # Install magma
           conda install -y -c pytorch "${MAGMA_VERSION}"
+          # Pin ffmpeg version to 4.4.1. See: https://github.com/pytorch/vision/issues/5616
+          conda install -y ffmpeg=4.4.1
       - name: Bisection
         run: |
           export BISECT_ISSUE="${{ github.event.inputs.issue_name }}"
 
@@ -14,9 +14,5 @@ build/
 .idea
 old.json
 te.json
-Video_data_train_processed.csv
-labels.json
-results.txt
-checkpoints
 logs/
 scripts/scribe.py
@@ -1,18 +1,3 @@
-[submodule "third_party/benchmark"]
-	path = legacy/third_party/benchmark
-	url = https://github.com/google/benchmark
-[submodule "setup/third_party/pytorch-dockerfiles"]
-	path = setup/third_party/pytorch-dockerfiles
-	url = https://github.com/pietern/pytorch-dockerfiles
-[submodule "python/third_party_benchmarks/PyTorch-benchmark"]
-	path = python/third_party_benchmarks/PyTorch-benchmark
-	url = https://github.com/MlWoo/PyTorch-benchmark
-[submodule "third_party/eigen"]
-	path = legacy/third_party/eigen
-	url = https://github.com/eigenteam/eigen-git-mirror
-[submodule "third_party/sleef"]
-	path = legacy/third_party/sleef
-	url = https://github.com/shibatch/sleef
-[submodule "third_party/tbb"]
-	path = legacy/third_party/tbb
-	url = https://github.com/01org/tbb
+[submodule "submodules/FAMBench"]
+	path = submodules/FAMBench
+	url = https://github.com/facebookresearch/FAMBench.git
@@ -11,7 +11,7 @@ except for the torch products which are intended to be installed separately so
 different torch versions can be benchmarked.
 
 ### Using Pre-built Packages
-We support python 3.7 and 3.8, and 3.8 is recommended. Currently, there are compatibility issues with 3.9+.  Conda is optional but suggested. To switch to python 3.8 in conda:
+We support python 3.7+, and 3.8 is recommended. Conda is optional but suggested. To start with python 3.8 in conda:
 ```
 # using your current conda environment:
 conda install -y python=3.8
 
@@ -27,8 +27,8 @@
 TORCH_GITREPO="https://github.com/pytorch/pytorch.git"
 TORCHBENCH_GITREPO="https://github.com/pytorch/benchmark.git"
 TORCHBENCH_DEPS = {
-    "torchtext": os.path.expandvars("${HOME}/text"),
-    "torchvision": os.path.expandvars("${HOME}/vision"),
+    "torchtext": (os.path.expandvars("${HOME}/text"), "main"),
+    "torchvision": (os.path.expandvars("${HOME}/vision"), "main"),
 }
 
 def exist_dir_path(string):
@@ -152,8 +152,7 @@ def prep(self) -> bool:
     # Update pytorch, torchtext, and torchvision repo
     def update_repos(self):
         repos = [(self.srcpath, "master")]
-        for value in TORCHBENCH_DEPS.values():
-            repos.append((value, "main"))
+        repos.extend(TORCHBENCH_DEPS.values())
         for (repo, branch) in repos:
             gitutils.clean_git_repo(repo)
             assert gitutils.update_git_repo(repo, branch), f"Failed to update {branch} branch of repository {repo}."
@@ -195,23 +194,25 @@ def setup_build_env(self, env) -> Dict[str, str]:
     # Checkout the last commit of dependencies on date
     def checkout_deps(self, cdate: datetime):
         for pkg in TORCHBENCH_DEPS:
-            dep_commit = gitutils.get_git_commit_on_date(TORCHBENCH_DEPS[pkg], cdate)
+            pkg_path, branch = TORCHBENCH_DEPS[pkg]
+            gitutils.checkout_git_branch(pkg_path, branch)
+            dep_commit = gitutils.get_git_commit_on_date(pkg_path, cdate)
             print(f"Checking out {pkg} commit {dep_commit} ...", end="", flush=True)
             assert dep_commit, "Failed to find the commit on {cdate} of {pkg}"
-            assert gitutils.checkout_git_commit(TORCHBENCH_DEPS[pkg], dep_commit), "Failed to checkout commit {commit} of {pkg}"
+            assert gitutils.checkout_git_commit(pkg_path, dep_commit), "Failed to checkout commit {commit} of {pkg}"
             print("done.")
 
     # Install dependencies such as torchtext and torchvision
     def build_install_deps(self, build_env):
         # Build torchvision
         print(f"Building torchvision ...", end="", flush=True)
         command = "python setup.py install"
-        subprocess.check_call(command, cwd=TORCHBENCH_DEPS["torchvision"], env=build_env, shell=True)
+        subprocess.check_call(command, cwd=TORCHBENCH_DEPS["torchvision"][0], env=build_env, shell=True)
         print("done")
         # Build torchtext
         print(f"Building torchtext ...", end="", flush=True)
         command = "python setup.py clean install"
-        subprocess.check_call(command, cwd=TORCHBENCH_DEPS["torchtext"], env=build_env, shell=True)
+        subprocess.check_call(command, cwd=TORCHBENCH_DEPS["torchtext"][0], env=build_env, shell=True)
         print("done")
 
     def _build_lazy_tensor(self, commit: Commit, build_env: Dict[str, str]):
 
@@ -51,9 +51,9 @@ def pip_install_requirements():
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument("--continue_on_fail", action="store_true")
-    parser.add_argument("--models", nargs='+', default=[],
+    parser.add_argument("models", nargs='*', default=[],
                         help="Specify one or more models to install. If not set, install all models.")
+    parser.add_argument("--continue_on_fail", action="store_true")
     parser.add_argument("--verbose", "-v", action="store_true")
     args = parser.parse_args()
 
 
@@ -1,3 +1,4 @@
+accelerate
 bs4
 patch
 py-cpuinfo
 
@@ -9,13 +9,15 @@
 SUPPORT_DEVICE_LIST = ["cpu", "cuda"]
 
 def run(func) -> Dict[str, float]:
-    torch.cuda.synchronize()
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
     result = {}
     # Collect time_ns() instead of time() which does not provide better precision than 1
     # second according to https://docs.python.org/3/library/time.html#time.time.
     t0 = time.time_ns()
     func()
-    torch.cuda.synchronize()
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
     t2 = time.time_ns()
     result["latency_ms"] = (t2 - t0) / 1_000_000.0
     return result
 
@@ -1,15 +1,24 @@
+import os
 import argparse
-from torchbenchmark.microbenchmarks.nvfuser import run_nvfuser_microbenchmarks
+import importlib
+from typing import List
 
+CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
+MICROBENCHMARKS_DIR = os.path.join(CURRENT_DIR, "torchbenchmark", "microbenchmarks")
 
-def run():
-    parser = argparse.ArgumentParser(description="Run nvfuser microbenchmarks")
-    parser.add_argument("--filter", nargs="*", default=[], help='List of benchmarks to test')
-    args, extra_args = parser.parse_known_args()
-    args = parser.parse_args()
+def list_microbenchmarks() -> List[str]:
+    return os.listdir(MICROBENCHMARKS_DIR)
 
-    run_nvfuser_microbenchmarks(args.filter, extra_args)
+def run():
+    parser = argparse.ArgumentParser(description="Run TorchBench microbenchmarks")
+    parser.add_argument("bm_name", choices=list_microbenchmarks(), help='name of the microbenchmark')
+    args, bm_args = parser.parse_known_args()
 
+    try:
+        microbenchmark = importlib.import_module(f"torchbenchmark.microbenchmarks.{args.bm_name}")
+        microbenchmark.run(bm_args)
+    except ImportError as e:
+        print(f"Failed to import microbenchmark module {args.bm_name}, error: {str(e)}")
 
 if __name__ == "__main__":
     run()
@@ -52,6 +52,7 @@ class ModelTestResult:
     extra_args: List[str]
     status: str
     batch_size: Optional[int]
+    precision: str
     results: Dict[str, Any]
 
 def _list_model_paths(models: List[str]) -> List[str]:
@@ -80,7 +81,7 @@ def _validate_devices(devices: str) -> List[str]:
 
 def _run_model_test(model_path: pathlib.Path, test: str, device: str, jit: bool, batch_size: Optional[int], extra_args: List[str]) -> ModelTestResult:
     assert test == "train" or test == "eval", f"Test must be either 'train' or 'eval', but get {test}."
-    result = ModelTestResult(name=model_path.name, test=test, device=device, extra_args=extra_args, batch_size=None,
+    result = ModelTestResult(name=model_path.name, test=test, device=device, extra_args=extra_args, batch_size=None, precision="fp32",
                              status="OK", results={})
     # Run the benchmark test in a separate process
     print(f"Running model {model_path.name} ... ", end='', flush=True)
@@ -96,6 +97,7 @@ def _run_model_test(model_path: pathlib.Path, test: str, device: str, jit: bool,
         task.make_model_instance(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
         # Check the batch size in the model matches the specified value
         result.batch_size = task.get_model_attribute(bs_name)
+        result.precision = task.get_model_attribute("dargs", "precision")
         if batch_size and (not result.batch_size == batch_size):
             raise ValueError(f"User specify batch size {batch_size}, but model {result.name} runs with batch size {result.batch_size}. Please report a bug.")
         result.results["latency_ms"] = run_one_step(task.invoke, device)
 
@@ -18,14 +18,14 @@ chmod +x "${CUDA_INSTALLER}"
 rm -f "${CUDA_INSTALLER}"
 rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.3 /usr/local/cuda
 
-# install CUDA 11.3 CuDNN 8.2.0
+# install CUDA 11.3 CuDNN 8.3.2
 # cuDNN download archive: https://developer.nvidia.com/rdp/cudnn-archive
 # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
 mkdir tmp_cudnn && cd tmp_cudnn
-wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.2.0/cudnn-11.3-linux-x64-v8.2.0.53.tgz -O cudnn-8.2.tgz
-tar xf cudnn-8.2.tgz
-cp -a cuda/include/* /usr/local/cuda/include/
-cp -a cuda/lib64/* /usr/local/cuda/lib64/
+wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz -O cudnn-8.3.2.tar.xz
+tar xJf cudnn-8.3.2.tar.xz
+cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/include/* /usr/local/cuda/include/
+cp -a cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive/lib/* /usr/local/cuda/lib64/
 cd ..
 rm -rf tmp_cudnn
 ldconfig
@@ -14,6 +14,7 @@
 
 import torch
 from torchbenchmark import _list_model_paths, ModelTask, get_metadata_from_yaml
+from torchbenchmark.util.metadata_utils import skip_by_metadata
 
 
 # Some of the models have very heavyweight setup, so we have to set a very
@@ -24,14 +25,6 @@
 # unresponsive for 5 minutes the parent will presume it dead / incapacitated.)
 TIMEOUT = 300  # Seconds
 
-# Skip this list of unit tests. One reason may be that the original batch size
-# used in the paper is too large to fit on the CI's GPU.
-EXCLUDELIST = {("densenet121", "train", "cuda"),  # GPU train runs out of memory on CI.
-               ("densenet121", "train", "cpu"),  # CPU train runs for too long on CI.
-               ("densenet121", "example", "cuda"),  # GPU train runs out of memory on CI.
-               ("densenet121", "example", "cpu")}  # CPU train runs for too long on CI.
-
-
 class TestBenchmark(unittest.TestCase):
 
     def setUp(self):
@@ -51,27 +44,37 @@ def test_fx_profile(self):
             main(["--repeat=1", "--filter=pytorch_struct", "--device=cpu"])
             self.assertGreaterEqual(mock_save.call_count, 1)
 
+def _create_example_model_instance(task: ModelTask, device: str):
+    skip = False
+    try:
+        task.make_model_instance(test="eval", device=device, jit=False)
+    except NotImplementedError:
+        try:
+            task.make_model_instance(test="train", device=device, jit=False)
+        except NotImplementedError:
+            skip = True
+    finally:
+        if skip:
+            raise NotImplementedError(f"Model is not implemented on the device {device}")
 
 def _load_test(path, device):
 
     def example_fn(self):
         task = ModelTask(path, timeout=TIMEOUT)
         with task.watch_cuda_memory(skip=(device != "cuda"), assert_equal=self.assertEqual):
             try:
-                task.make_model_instance(test="eval", device=device, jit=False)
+                _create_example_model_instance(task, device)
                 task.check_example()
                 task.del_model_instance()
-
             except NotImplementedError:
-                self.skipTest('Method get_module is not implemented, skipping...')
+                self.skipTest(f'Method `get_module()` on {device} is not implemented, skipping...')
 
     def train_fn(self):
         metadata = get_metadata_from_yaml(path)
         task = ModelTask(path, timeout=TIMEOUT)
         with task.watch_cuda_memory(skip=(device != "cuda"), assert_equal=self.assertEqual):
             try:
                 task.make_model_instance(test="train", device=device, jit=False)
-                task.set_train()
                 task.invoke()
                 task.check_details_train(device=device, md=metadata)
                 task.del_model_instance()
@@ -84,8 +87,6 @@ def eval_fn(self):
         with task.watch_cuda_memory(skip=(device != "cuda"), assert_equal=self.assertEqual):
             try:
                 task.make_model_instance(test="eval", device=device, jit=False)
-
-                task.set_eval()
                 task.invoke()
                 task.check_details_eval(device=device, md=metadata)
                 task.check_eval_output()
@@ -104,10 +105,13 @@ def check_device_fn(self):
                 self.skipTest(f'Method check_device on {device} is not implemented, skipping...')
 
     name = os.path.basename(path)
+    metadata = get_metadata_from_yaml(path)
     for fn, fn_name in zip([example_fn, train_fn, eval_fn, check_device_fn],
                            ["example", "train", "eval", "check_device"]):
+        # set exclude list based on metadata
         setattr(TestBenchmark, f'test_{name}_{fn_name}_{device}',
-                (unittest.skipIf((name, fn_name, device) in EXCLUDELIST, "This test is on the EXCLUDELIST")(fn)))
+                (unittest.skipIf(skip_by_metadata(test=fn_name, device=device,\
+                                                  jit=False, extra_args=[], metadata=metadata), "This test is skipped by its metadata")(fn)))
 
 
 def _load_tests():
 
@@ -15,9 +15,9 @@
 import pytest
 import time
 from components._impl.workers import subprocess_worker
-from torchbenchmark import _list_model_paths, ModelTask
+from torchbenchmark import _list_model_paths, ModelTask, get_metadata_from_yaml
 from torchbenchmark.util.machine_config import get_machine_state
-
+from torchbenchmark.util.metadata_utils import skip_by_metadata
 
 def pytest_generate_tests(metafunc):
     # This is where the list of models to test can be configured
@@ -48,12 +48,14 @@ class TestBenchNetwork:
 
     def test_train(self, model_path, device, compiler, benchmark):
         try:
+            if skip_by_metadata(test="train", device=device, jit=(compiler == 'jit'), \
+                                extra_args=[], metadata=get_metadata_from_yaml(model_path)):
+                raise NotImplementedError("Test skipped by its metadata.")
             task = ModelTask(model_path)
             if not task.model_details.exists:
                 return  # Model is not supported.
 
             task.make_model_instance(test="train", device=device, jit=(compiler == 'jit'))
-            task.set_train()
             benchmark(task.invoke)
             benchmark.extra_info['machine_state'] = get_machine_state()
 
@@ -62,14 +64,16 @@ def test_train(self, model_path, device, compiler, benchmark):
 
     def test_eval(self, model_path, device, compiler, benchmark, pytestconfig):
         try:
+            if skip_by_metadata(test="eval", device=device, jit=(compiler == 'jit'), \
+                                extra_args=[], metadata=get_metadata_from_yaml(model_path)):
+                raise NotImplementedError("Test skipped by its metadata.")
             task = ModelTask(model_path)
             if not task.model_details.exists:
                 return  # Model is not supported.
 
             task.make_model_instance(test="eval", device=device, jit=(compiler == 'jit'))
 
             with task.no_grad(disable_nograd=pytestconfig.getoption("disable_nograd")):
-                task.set_eval()
                 benchmark(task.invoke)
                 benchmark.extra_info['machine_state'] = get_machine_state()
                 if pytestconfig.getoption("check_opt_vs_noopt_jit"):
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+accelerate`
`1`	`2`	`bs4`
`2`	`3`	`patch`
`3`	`4`	`py-cpuinfo`