pytorch · zewenli98 · Dec 12, 2024 · Sep 19, 2024 · Sep 20, 2024 · Oct 1, 2024
diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -152,33 +152,33 @@ def initialize_globals(channel: str, build_python_only: bool) -> None:
         "12.4": "pytorch/manylinux2_28-builder:cuda12.4",
         "12.6": "pytorch/manylinux2_28-builder:cuda12.6",
         **{
-            gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}"
+            gpu_arch: f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
             for gpu_arch in ROCM_ARCHES
         },
-        CPU: "pytorch/manylinux-builder:cpu",
+        CPU: "pytorch/manylinux2_28-builder:cpu",
         XPU: "pytorch/manylinux2_28-builder:xpu",
         # TODO: Migrate CUDA_AARCH64 image to manylinux2_28_aarch64-builder:cuda12.4
         CPU_AARCH64: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64",
         CUDA_AARCH64: "pytorch/manylinuxaarch64-builder:cuda12.4",
     }
     LIBTORCH_CONTAINER_IMAGES = {
         **{
-            (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:cuda{gpu_arch}"
+            (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:cuda{gpu_arch}"
             for gpu_arch in CUDA_ARCHES
         },
         **{
             (gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}"
             for gpu_arch in CUDA_ARCHES
         },
         **{
-            (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:rocm{gpu_arch}"
+            (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
             for gpu_arch in ROCM_ARCHES
         },
         **{
             (gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}"
             for gpu_arch in ROCM_ARCHES
         },
-        (CPU, PRE_CXX11_ABI): "pytorch/manylinux-builder:cpu",
+        (CPU, PRE_CXX11_ABI): "pytorch/manylinux2_28-builder:cpu",
         (CPU, CXX11_ABI): "pytorch/libtorch-cxx11-builder:cpu",
     }
 

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
@@ -137,7 +137,7 @@ jobs:
         export CI_BUILD=1
         pushd .
         cd tests/py/dynamo
-        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
         popd
 
   tests-py-dynamo-fe:

diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml
@@ -129,7 +129,7 @@ jobs:
         export CI_BUILD=1
         pushd .
         cd tests/py/dynamo
-        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
         popd
 
   tests-py-dynamo-fe:
@@ -314,4 +314,4 @@ jobs:
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
-  cancel-in-progress: true
+  cancel-in-progress: true
diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml
@@ -132,7 +132,7 @@ jobs:
         export CI_BUILD=1
         pushd .
         cd tests/py/dynamo
-        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
         popd
 
   tests-py-dynamo-fe:
@@ -298,4 +298,4 @@ jobs:
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
-  cancel-in-progress: true
+  cancel-in-progress: true
diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml
@@ -119,7 +119,7 @@ jobs:
         export CI_BUILD=1
         pushd .
         cd tests/py/dynamo
-        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
         popd
 
   tests-py-dynamo-fe:

diff --git a/examples/dynamo/engine_caching_bert_example.py b/examples/dynamo/engine_caching_bert_example.py
@@ -52,7 +52,7 @@ def compile_bert(iterations=3):
             "truncate_double": True,
             "debug": False,
             "min_block_size": 1,
-            "make_refittable": True,
+            "immutable_weights": False,
             "cache_built_engines": cache_built_engines,
             "reuse_cached_engines": reuse_cached_engines,
             "engine_cache_dir": "/tmp/torch_trt_bert_engine_cache",

diff --git a/examples/dynamo/engine_caching_example.py b/examples/dynamo/engine_caching_example.py
@@ -63,7 +63,7 @@ def remove_timing_cache(path=TIMING_CACHE_PATH):
 # in a subsequent compilation, either as part of this session or a new session, the cache will
 # pull the built engine and **refit** the weights which can reduce compilation times by orders of magnitude.
 # As such, in order to insert a new engine into the cache (i.e. ``cache_built_engines=True``),
-# the engine must be refittable (``make_refittable=True``). See :ref:`refit_engine_example` for more details.
+# the engine must be refittable (``immutable_weights=False``). See :ref:`refit_engine_example` for more details.
 
 
 def torch_compile(iterations=3):
@@ -97,7 +97,7 @@ def torch_compile(iterations=3):
                 "enabled_precisions": enabled_precisions,
                 "debug": debug,
                 "min_block_size": min_block_size,
-                "make_refittable": True,
+                "immutable_weights": False,
                 "cache_built_engines": cache_built_engines,
                 "reuse_cached_engines": reuse_cached_engines,
             },
@@ -157,7 +157,7 @@ def dynamo_compile(iterations=3):
             enabled_precisions=enabled_precisions,
             debug=debug,
             min_block_size=min_block_size,
-            make_refittable=True,
+            immutable_weights=False,
             cache_built_engines=cache_built_engines,
             reuse_cached_engines=reuse_cached_engines,
             engine_cache_size=1 << 30,  # 1GB
@@ -268,7 +268,7 @@ def torch_compile_my_cache(iterations=3):
                 "enabled_precisions": enabled_precisions,
                 "debug": debug,
                 "min_block_size": min_block_size,
-                "make_refittable": True,
+                "immutable_weights": False,
                 "cache_built_engines": cache_built_engines,
                 "reuse_cached_engines": reuse_cached_engines,
                 "custom_engine_cache": engine_cache,

diff --git a/examples/dynamo/mutable_torchtrt_module_example.py b/examples/dynamo/mutable_torchtrt_module_example.py
@@ -31,7 +31,7 @@
 settings = {
     "use_python": False,
     "enabled_precisions": {torch.float32},
-    "make_refittable": True,
+    "immutable_weights": False,
 }
 
 model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -80,7 +80,7 @@
         "use_python_runtime": True,
         "enabled_precisions": {torch.float16},
         "debug": True,
-        "make_refittable": True,
+        "immutable_weights": False,
     }
 
     model_id = "runwayml/stable-diffusion-v1-5"

diff --git a/examples/dynamo/refit_engine_example.py b/examples/dynamo/refit_engine_example.py
@@ -47,7 +47,7 @@
 # ---------------------------------------
 #
 # The inital step is to compile a module and save it as with a normal. Note that there is an
-# additional parameter `make_refittable` that is set to `True`. This parameter is used to
+# additional parameter `immutable_weights` that is set to `False`. This parameter is used to
 # indicate that the engine being built should support weight refitting later. Engines built without
 # these setttings will not be able to be refit.
 #
@@ -69,7 +69,7 @@
     debug=debug,
     min_block_size=min_block_size,
     torch_executed_ops=torch_executed_ops,
-    make_refittable=True,
+    immutable_weights=False,
     reuse_cached_engines=False,
 )  # Output is a torch.fx.GraphModule
 

diff --git a/py/ci/Dockerfile.ci b/py/ci/Dockerfile.ci
@@ -1,4 +1,4 @@
-FROM pytorch/manylinux-builder:cuda12.4
+FROM pytorch/manylinux2_28-builder:cuda12.6
 
 RUN yum install -y ninja-build
 

diff --git a/py/torch_tensorrt/_enums.py b/py/torch_tensorrt/_enums.py
@@ -220,7 +220,7 @@ def _from(
                 return dtype.f32
             elif t == np.float64:
                 return dtype.f64
-            elif t == np.bool:
+            elif t == np.bool_:
                 return dtype.b
             # TODO: Consider using ml_dtypes when issues like this are resolved:
             # https://github.com/pytorch/pytorch/issues/109873
@@ -1384,7 +1384,7 @@ def current_platform(cls) -> Platform:
     def __str__(self) -> str:
         return str(self.name)
 
-    @needs_torch_tensorrt_runtime
+    @needs_torch_tensorrt_runtime  # type: ignore
     def _to_serialized_rt_platform(self) -> str:
         val: str = torch.ops.tensorrt._platform_unknown()