From 5bcca3877c3122aa1ebefef176de9086421a7e22 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Thu, 2 May 2024 23:44:04 +0800 Subject: [PATCH 01/31] feat: C++ runtime on Windows --- .github/workflows/build-test-windows.yml | 106 ++++++++++++++++-- packaging/pre_build_script_windows.sh | 21 +++- packaging/vc_env_helper.bat | 37 ++++++ py/torch_tensorrt/_features.py | 20 +++- setup.py | 7 +- third_party/tensorrt/local/BUILD | 11 +- .../ci_workspaces/WORKSPACE.win.release.tmpl | 19 ++-- 7 files changed, 185 insertions(+), 36 deletions(-) create mode 100644 packaging/vc_env_helper.bat diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 7aae60c5fa..36cea51d94 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -35,6 +35,7 @@ jobs: include: - repository: pytorch/tensorrt pre-script: packaging/pre_build_script_windows.sh + env-script: packaging/vc_env_helper.bat smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt name: Build torch-tensorrt whl package @@ -46,10 +47,45 @@ jobs: test-infra-ref: main build-matrix: ${{ needs.generate-matrix.outputs.matrix }} pre-script: ${{ matrix.pre-script }} + env-script: ${{ matrix.env-script }} smoke-test-script: ${{ matrix.smoke-test-script }} package-name: ${{ matrix.package-name }} trigger-event: ${{ github.event_name }} + tests-py-torchscript-fe: + name: Test torchscript frontend [Python] + needs: [generate-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + with: + job-name: tests-py-torchscript-fe + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + pushd . + cd tests/modules + # Don't use requirements.txt here as it contains tensorrt and torch which should have been installed by now. + ${CONDA_RUN} python -m pip install numpy packaging pyyaml transformers timm pybind11==2.6.2 + ${CONDA_RUN} python hub.py + popd + pushd . + cd tests/py/ts + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + popd + tests-py-dynamo-converters: name: Test dynamo converters [Python] needs: [generate-matrix, build] @@ -59,10 +95,10 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml + uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main with: job-name: tests-py-dynamo-converters - repository: "pytorch/tensorrt" + repository: ${{ matrix.repository }} ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main @@ -84,10 +120,10 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml + uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main with: job-name: tests-py-dynamo-fe - repository: "pytorch/tensorrt" + repository: ${{ matrix.repository }} ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main @@ -101,6 +137,32 @@ jobs: ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py popd + tests-py-dynamo-serde: + name: Test dynamo export serde [Python] + needs: [generate-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + with: + job-name: tests-py-dynamo-serde + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + pushd . + cd tests/py/dynamo + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + popd + tests-py-torch-compile-be: name: Test torch compile backend [Python] needs: [generate-matrix, build] @@ -110,15 +172,15 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml + uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main with: job-name: tests-py-torch-compile-be repository: ${{ matrix.repository }} ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - pre-script: packaging/driver_upgrade.bat build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat script: | export USE_HOST_DEPS=1 pushd . @@ -136,10 +198,10 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml + uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main with: job-name: tests-py-dynamo-core - repository: "pytorch/tensorrt" + repository: ${{ matrix.repository }} ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main @@ -154,6 +216,32 @@ jobs: ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ popd + tests-py-core: + name: Test core [Python] + needs: [generate-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + with: + job-name: tests-py-core + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + script: | + export USE_HOST_DEPS=1 + pushd . + cd tests/py/core + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + popd + concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file + cancel-in-progress: true diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index dd768b8662..43c6d6999d 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -1,12 +1,27 @@ -python -m pip install -U numpy packaging pyyaml setuptools wheel +set -eou pipefail -# Install TRT from PyPi -TRT_VERSION=$(${CONDA_RUN} python -c "import yaml; print(yaml.safe_load(open('dev_dep_versions.yml', 'r'))['__tensorrt_version__'])") +python -m pip install -U numpy packaging pyyaml setuptools wheel +# Install TRT from PyPI +TRT_VERSION=$(python -c "import yaml; print(yaml.safe_load(open('dev_dep_versions.yml', 'r'))['__tensorrt_version__'])") python -m pip install tensorrt==${TRT_VERSION} tensorrt-${CU_VERSION::4}==${TRT_VERSION} tensorrt-${CU_VERSION::4}-bindings==${TRT_VERSION} tensorrt-${CU_VERSION::4}-libs==${TRT_VERSION} --extra-index-url https://pypi.nvidia.com choco install bazelisk -y +if [ ${CU_VERSION} = cu118 ]; then + TRT_DOWNLOAD_LINK=https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-11.8.zip +elif [ ${CU_VERSION} = cu121 ]; then + TRT_DOWNLOAD_LINK=https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip +else + echo "Unsupported CU_VERSION" + exit 1 +fi + +curl -Lo TensorRT.zip ${TRT_DOWNLOAD_LINK} +unzip TensorRT.zip -d C:/ + +export CUDA_HOME="$(echo ${CUDA_PATH} | sed -e 's#\\#\/#g')" + cat toolchains/ci_workspaces/WORKSPACE.win.release.tmpl | envsubst > WORKSPACE echo "RELEASE=1" >> ${GITHUB_ENV} diff --git a/packaging/vc_env_helper.bat b/packaging/vc_env_helper.bat new file mode 100644 index 0000000000..79c81861f6 --- /dev/null +++ b/packaging/vc_env_helper.bat @@ -0,0 +1,37 @@ +@echo on + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "BAZEL_VC=%%i\VC" + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +if "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 || exit /b 1 +) else ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 +) + +@echo on + +set DISTUTILS_USE_SDK=1 + +set args=%1 +shift +:start +if [%1] == [] goto done +set args=%args% %1 +shift +goto start + +:done +if "%args%" == "" ( + echo Usage: vc_env_helper.bat [command] [args] + echo e.g. vc_env_helper.bat cl /c test.cpp +) + +%args% || exit /b 1 diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index dde99cbaf6..02e2108591 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -1,4 +1,5 @@ import os +import sys from collections import namedtuple from torch_tensorrt._utils import sanitized_torch_version @@ -15,10 +16,23 @@ ], ) -_TS_FE_AVAIL = os.path.isfile(os.path.dirname(__file__) + "/lib/libtorchtrt.so") -_TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile( - os.path.dirname(__file__) + "/lib/libtorchtrt_runtime.so" +trtorch_dir = os.path.dirname(__file__) +linked_file = os.path.join( + "lib", "torchtrt.dll" if sys.platform.startswith("win") else "libtorchtrt.so" ) +linked_file_runtime = os.path.join( + "lib", + ( + "torchtrt_runtime.dll" + if sys.platform.startswith("win") + else "libtorchtrt_runtime.so" + ), +) +linked_file_full_path = os.path.join(trtorch_dir, linked_file) +linked_file_runtime_full_path = os.path.join(trtorch_dir, linked_file_runtime) + +_TS_FE_AVAIL = os.path.isfile(linked_file_full_path) +_TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile(linked_file_runtime_full_path) _DYNAMO_FE_AVAIL = version.parse(sanitized_torch_version()) >= version.parse("2.1.dev") _FX_FE_AVAIL = True diff --git a/setup.py b/setup.py index 7e30847480..4a67908a29 100644 --- a/setup.py +++ b/setup.py @@ -79,8 +79,7 @@ def load_dep_info(): CXX11_ABI = False JETPACK_VERSION = None -# TODO: Remove once C++ Runtime is integrated in Windows -PY_ONLY = IS_WINDOWS +PY_ONLY = False NO_TS = False LEGACY = False RELEASE = False @@ -238,7 +237,7 @@ def copy_libtorchtrt(multilinux=False, rt_only=False): if IS_WINDOWS: copyfile( dir_path + "/../bazel-bin/cpp/lib/torchtrt.dll", - dir_path + "/torch_tensorrt/torchtrt.dll", + dir_path + "/torch_tensorrt/lib/torchtrt.dll", ) copyfile( dir_path + "/../bazel-bin/cpp/lib/torchtrt.dll.if.lib", @@ -379,7 +378,6 @@ class CleanCommand(Command): ] PY_CLEAN_FILES = [ os.path.join(".", "torch_tensorrt", "*.so"), - os.path.join(".", "torch_tensorrt", "*.dll"), os.path.join(".", "torch_tensorrt", "_version.py"), os.path.join(".", "torch_tensorrt", "BUILD"), os.path.join(".", "torch_tensorrt", "WORKSPACE"), @@ -584,7 +582,6 @@ def run(self): "include/torch_tensorrt/core/util/logging/*.h", "bin/*", "lib/*", - "*.dll", ] } ) diff --git a/third_party/tensorrt/local/BUILD b/third_party/tensorrt/local/BUILD index ef9ff7b956..a755ecf862 100644 --- a/third_party/tensorrt/local/BUILD +++ b/third_party/tensorrt/local/BUILD @@ -80,7 +80,7 @@ cc_import( static_library = select({ ":aarch64_linux": "lib/aarch64-linux-gnu/libnvinfer_static.a", ":ci_rhel_x86_64_linux": "lib64/libnvinfer_static.a", - ":windows": "lib/nvinfer.lib", + ":windows": "lib/nvinfer_10.lib", "//conditions:default": "lib/x86_64-linux-gnu/libnvinfer_static.a", }), visibility = ["//visibility:private"], @@ -91,7 +91,7 @@ cc_import( shared_library = select({ ":aarch64_linux": "lib/aarch64-linux-gnu/libnvinfer.so", ":ci_rhel_x86_64_linux": "lib64/libnvinfer.so", - ":windows": "lib/nvinfer.dll", + ":windows": "lib/nvinfer_10.dll", "//conditions:default": "lib/x86_64-linux-gnu/libnvinfer.so", }), visibility = ["//visibility:private"], @@ -104,7 +104,10 @@ cc_library( "nvinfer_headers", "nvinfer_lib", "@cuda//:cudart", - ], + ] + select({ + ":windows": ["nvinfer_static_lib"], + "//conditions:default": [], + }), ) #################################################################################### @@ -330,7 +333,7 @@ cc_library( srcs = select({ ":aarch64_linux": ["lib/aarch64-linux-gnu/libnvinfer_plugin.so"], ":ci_rhel_x86_64_linux": ["lib64/libnvinfer_plugin.so"], - ":windows": ["lib/nvinfer_plugin.lib"], + ":windows": ["lib/nvinfer_plugin_10.lib"], "//conditions:default": ["lib/x86_64-linux-gnu/libnvinfer_plugin.so"], }), hdrs = select({ diff --git a/toolchains/ci_workspaces/WORKSPACE.win.release.tmpl b/toolchains/ci_workspaces/WORKSPACE.win.release.tmpl index 01bbd2130e..40ebf12494 100644 --- a/toolchains/ci_workspaces/WORKSPACE.win.release.tmpl +++ b/toolchains/ci_workspaces/WORKSPACE.win.release.tmpl @@ -36,7 +36,7 @@ http_archive( # External dependency for torch_tensorrt if you already have precompiled binaries. local_repository( name = "torch_tensorrt", - path = "/opt/circleci/.pyenv/versions/3.10.9/lib/python3.10/site-packages/torch_tensorrt" + path = "/opt/conda/lib/python3.8/site-packages/torch_tensorrt", ) # CUDA should be installed on the system locally @@ -46,11 +46,6 @@ new_local_repository( path = "${CUDA_HOME}", ) -new_local_repository( - name = "cublas", - build_file = "@//third_party/cublas:BUILD", - path = "C:/", -) ############################################################################################################# # Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs) ############################################################################################################# @@ -59,14 +54,14 @@ http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-cxx11-abi-shared-with-deps-latest.zip"], + urls = ["https://download.pytorch.org/libtorch/${CHANNEL}/${CU_VERSION}/libtorch-win-shared-with-deps-latest.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu121/libtorch-shared-with-deps-latest.zip"], + urls = ["https://download.pytorch.org/libtorch/${CHANNEL}/${CU_VERSION}/libtorch-win-shared-with-deps-latest.zip"], ) #################################################################################### @@ -75,13 +70,13 @@ http_archive( new_local_repository( name = "tensorrt", - path = "C:/", + path = "C:/TensorRT-10.0.1.6", build_file = "@//third_party/tensorrt/local:BUILD" ) -# ######################################################################### -# # Testing Dependencies (optional - comment out on aarch64) -# ######################################################################### +######################################################################### +# Development Dependencies (optional - comment out on aarch64) +######################################################################### load("@rules_python//python:pip.bzl", "pip_parse") From c6de5459abd16fef2f0724c6a5bf350936566ab0 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 3 May 2024 07:50:03 +0800 Subject: [PATCH 02/31] Add cu124 for TRT_DOWNLOAD_LINK --- packaging/pre_build_script_windows.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index 43c6d6999d..4a184f9b61 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -10,7 +10,7 @@ choco install bazelisk -y if [ ${CU_VERSION} = cu118 ]; then TRT_DOWNLOAD_LINK=https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-11.8.zip -elif [ ${CU_VERSION} = cu121 ]; then +elif [ ${CU_VERSION} = cu121 ] || [ ${CU_VERSION} = cu124 ]; then TRT_DOWNLOAD_LINK=https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip else echo "Unsupported CU_VERSION" From 63c7e76c594191285a92898959edd08d42d5ca96 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 3 May 2024 22:16:20 +0800 Subject: [PATCH 03/31] Update vc_env_helper.bat --- packaging/vc_env_helper.bat | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packaging/vc_env_helper.bat b/packaging/vc_env_helper.bat index 79c81861f6..33605856b7 100644 --- a/packaging/vc_env_helper.bat +++ b/packaging/vc_env_helper.bat @@ -1,6 +1,9 @@ @echo on -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -property installationPath`) do ( +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( set "BAZEL_VC=%%i\VC" set "VS15INSTALLDIR=%%i" From 822265ab39e1ef2345b38934a5de2cabc325bf7f Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 4 May 2024 00:10:35 +0800 Subject: [PATCH 04/31] Add wheel-build-params --- .github/workflows/build-test-windows.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 36cea51d94..f5f3a8bd17 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -36,6 +36,7 @@ jobs: - repository: pytorch/tensorrt pre-script: packaging/pre_build_script_windows.sh env-script: packaging/vc_env_helper.bat + wheel-build-params: "--release" smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt name: Build torch-tensorrt whl package @@ -48,6 +49,7 @@ jobs: build-matrix: ${{ needs.generate-matrix.outputs.matrix }} pre-script: ${{ matrix.pre-script }} env-script: ${{ matrix.env-script }} + wheel-build-params: ${{ matrix.wheel-build-params }} smoke-test-script: ${{ matrix.smoke-test-script }} package-name: ${{ matrix.package-name }} trigger-event: ${{ github.event_name }} From 0ef76e31bd5783af0d883c9feb7dbf51a4dc14e3 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 4 May 2024 02:21:33 +0800 Subject: [PATCH 05/31] Migrate to G5 instance in test --- .github/workflows/windows-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index aa7d461e14..4a5c51c34c 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -54,7 +54,7 @@ jobs: SCRIPT: ${{ inputs.script }} PYTHONUTF8: 1 name: ${{ inputs.job-name }}-${{ matrix.desired_cuda }} - runs-on: windows.8xlarge.nvidia.gpu.nonephemeral + runs-on: windows.g5.4xlarge.nvidia.gpu defaults: run: shell: bash -l {0} From 4b6f80664773fae61cd475d0541dd2c2e6d041e6 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 4 May 2024 13:02:21 +0800 Subject: [PATCH 06/31] Use local file to actually run on G5 instance before merging --- .github/workflows/build-test-windows.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index f5f3a8bd17..be0f671dd4 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -63,7 +63,7 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + uses: ./.github/workflows/windows-test.yml with: job-name: tests-py-torchscript-fe repository: ${{ matrix.repository }} @@ -97,7 +97,7 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + uses: ./.github/workflows/windows-test.yml with: job-name: tests-py-dynamo-converters repository: ${{ matrix.repository }} @@ -122,7 +122,7 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + uses: ./.github/workflows/windows-test.yml with: job-name: tests-py-dynamo-fe repository: ${{ matrix.repository }} @@ -148,7 +148,7 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + uses: ./.github/workflows/windows-test.yml with: job-name: tests-py-dynamo-serde repository: ${{ matrix.repository }} @@ -174,7 +174,7 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + uses: ./.github/workflows/windows-test.yml with: job-name: tests-py-torch-compile-be repository: ${{ matrix.repository }} @@ -200,7 +200,7 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + uses: ./.github/workflows/windows-test.yml with: job-name: tests-py-dynamo-core repository: ${{ matrix.repository }} @@ -227,7 +227,7 @@ jobs: include: - repository: pytorch/tensorrt package-name: torch_tensorrt - uses: pytorch/tensorrt/.github/workflows/windows-test.yml@main + uses: ./.github/workflows/windows-test.yml with: job-name: tests-py-core repository: ${{ matrix.repository }} From e238acf8182ed0752506b13824a95b30b53319f5 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 4 May 2024 13:10:04 +0800 Subject: [PATCH 07/31] Migrate to G5 instance in build --- .github/workflows/build-test-windows.yml | 2 +- .github/workflows/build_wheels_windows.yml | 220 +++++++++++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build_wheels_windows.yml diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index be0f671dd4..877a0a82fb 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -40,7 +40,7 @@ jobs: smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt name: Build torch-tensorrt whl package - uses: pytorch/test-infra/.github/workflows/build_wheels_windows.yml@main + uses: ./.github/workflows/build_wheels_windows.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml new file mode 100644 index 0000000000..92d94b043c --- /dev/null +++ b/.github/workflows/build_wheels_windows.yml @@ -0,0 +1,220 @@ +name: Build Windows Wheels + +on: + workflow_call: + inputs: + repository: + description: 'Repository to checkout, defaults to ""' + default: "" + type: string + ref: + description: 'Reference to checkout, defaults to "nightly"' + default: "nightly" + type: string + test-infra-repository: + description: "Test infra repository to use" + default: "pytorch/test-infra" + type: string + test-infra-ref: + description: "Test infra reference to use" + default: "" + type: string + build-matrix: + description: "Build matrix to utilize" + default: "" + type: string + pre-script: + description: "Pre script to run prior to build" + default: "" + type: string + env-script: + description: "Script to setup environment variables for the build" + default: "" + type: string + wheel-build-params: + description: "Additional parameters for bdist_wheel" + default: "" + type: string + post-script: + description: "Post script to run prior to build" + default: "" + type: string + smoke-test-script: + description: "Script for Smoke Test for a specific domain" + default: "" + type: string + package-name: + description: "Name of the actual python package that is imported" + default: "" + type: string + trigger-event: + description: "Trigger Event in caller that determines whether or not to upload" + default: "" + type: string + cache-path: + description: "The path(s) on the runner to cache or restore. The path is relative to repository." + default: "" + type: string + cache-key: + description: "The key created when saving a cache and the key used to search for a cache." + default: "" + type: string + submodules: + description: "Works as stated in actions/checkout, but the default value is recursive" + required: false + type: string + default: recursive + +permissions: + id-token: write + contents: read + +jobs: + build: + strategy: + fail-fast: false + matrix: ${{ fromJSON(inputs.build-matrix) }} + env: + PYTHON_VERSION: ${{ matrix.python_version }} + PACKAGE_TYPE: wheel + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + CU_VERSION: ${{ matrix.desired_cuda }} + UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} + name: ${{ matrix.build_name }} + runs-on: windows.g5.4xlarge.nvidia.gpu + defaults: + run: + shell: bash -l {0} + # If a build is taking longer than 60 minutes on these runners we need + # to have a conversation + timeout-minutes: 60 + steps: + - uses: actions/checkout@v3 + with: + # Support the use case where we need to checkout someone's fork + repository: ${{ inputs.test-infra-repository }} + ref: ${{ inputs.test-infra-ref }} + path: test-infra + - uses: ./test-infra/.github/actions/setup-ssh + name: Setup SSH + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + activate-with-label: false + instructions: "SSH with rdesktop using ssh -L 3389:localhost:3389 %%username%%@%%hostname%%" + - name: Add Conda scripts to GitHub path + run: | + echo "C:/Jenkins/Miniconda3/Scripts" >> $GITHUB_PATH + - uses: ./test-infra/.github/actions/set-channel + - name: Set PYTORCH_VERSION + if: ${{ env.CHANNEL == 'test' }} + run: | + # When building RC, set the version to be the current candidate version, + # otherwise, leave it alone so nightly will pick up the latest + echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" + - uses: ./test-infra/.github/actions/setup-binary-builds + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + submodules: ${{ inputs.submodules }} + setup-miniconda: false + python-version: ${{ env.PYTHON_VERSION }} + cuda-version: ${{ env.CU_VERSION }} + arch: ${{ env.ARCH }} + - name: Install torch dependency + run: | + source "${BUILD_ENV_FILE}" + # shellcheck disable=SC2086 + ${CONDA_RUN} ${PIP_INSTALL_TORCH} + - name: Run Pre-Script with Caching + if: ${{ inputs.pre-script != '' }} + uses: ./test-infra/.github/actions/run-script-with-cache + with: + cache-path: ${{ inputs.cache-path }} + cache-key: ${{ inputs.cache-key }} + repository: ${{ inputs.repository }} + script: ${{ inputs.pre-script }} + is_windows: 'enabled' + - name: Build clean + working-directory: ${{ inputs.repository }} + run: | + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} python setup.py clean + - name: Build the wheel (bdist_wheel) + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + BUILD_PARAMS: ${{ inputs.wheel-build-params }} + run: | + source "${BUILD_ENV_FILE}" + + if [[ "$CU_VERSION" == "cpu" ]]; then + # CUDA and CPU are ABI compatible on the CPU-only parts, so strip + # in this case + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" + else + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//')" + fi + + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} python setup.py bdist_wheel + else + if [[ ! -f ${ENV_SCRIPT} ]]; then + echo "::error::Specified env-script file (${ENV_SCRIPT}) not found" + exit 1 + else + ${CONDA_RUN} ${ENV_SCRIPT} python setup.py bdist_wheel ${BUILD_PARAMS} + fi + fi + - name: Run post-script + working-directory: ${{ inputs.repository }} + env: + POST_SCRIPT: ${{ inputs.post-script }} + ENV_SCRIPT: ${{ inputs.env-script }} + if: ${{ inputs.post-script != '' }} + run: | + set -euxo pipefail + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} ${ENV_SCRIPT} ${POST_SCRIPT} + - name: Smoke Test + env: + PACKAGE_NAME: ${{ inputs.package-name }} + SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + run: | + source "${BUILD_ENV_FILE}" + WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") + echo "$WHEEL_NAME" + ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" + if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + ${CONDA_RUN} python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + fi + # NB: Only upload to GitHub after passing smoke tests + - name: Upload wheel to GitHub + continue-on-error: true + uses: actions/upload-artifact@v3 + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ inputs.repository }}/dist/ + - uses: ./test-infra/.github/actions/teardown-windows + if: always() + name: Teardown Windows + + upload: + needs: build + uses: ./.github/workflows/_binary_upload.yml + if: always() + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + test-infra-repository: ${{ inputs.test-infra-repository }} + test-infra-ref: ${{ inputs.test-infra-ref }} + build-matrix: ${{ inputs.build-matrix }} + trigger-event: ${{ inputs.trigger-event }} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true From 69bdbab1a6bfaf8ccb8aa775888222e0e222dc26 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 4 May 2024 13:18:46 +0800 Subject: [PATCH 08/31] Update build_wheels_windows.yml --- .github/workflows/build_wheels_windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml index 92d94b043c..920de5222d 100644 --- a/.github/workflows/build_wheels_windows.yml +++ b/.github/workflows/build_wheels_windows.yml @@ -205,7 +205,7 @@ jobs: upload: needs: build - uses: ./.github/workflows/_binary_upload.yml + uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main if: always() with: repository: ${{ inputs.repository }} From f2393536b2b57b693fdb1a3ce46b574cc6a993d5 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 4 May 2024 21:39:58 +0800 Subject: [PATCH 09/31] Fix ABI incompatibility with pybind11 --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 4a67908a29..d1ec6c2f0b 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ from typing import List import setuptools +import torch import yaml from setuptools import Extension, find_namespace_packages, setup from setuptools.command.build_ext import build_ext @@ -504,6 +505,7 @@ def run(self): ], extra_compile_args=( [ + f'/DPYBIND11_BUILD_ABI=\\"{torch._C._PYBIND11_BUILD_ABI}\\"', "/GS-", "/permissive-", ] From ac8a92b7d8627d3929a9f7fa1c8b001807dab3e1 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 4 May 2024 22:33:45 +0800 Subject: [PATCH 10/31] Fix FileNotFoundError on Windows in test_export_serde --- tests/py/dynamo/models/test_export_serde.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py index c13f43987f..9f2a754bbe 100644 --- a/tests/py/dynamo/models/test_export_serde.py +++ b/tests/py/dynamo/models/test_export_serde.py @@ -42,9 +42,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, "/tmp/trt.ep", inputs=[input]) + torchtrt.save(trt_module, "./trt.ep", inputs=[input]) # TODO: Enable this serialization issues are fixed - # deser_trt_module = torchtrt.load("/tmp/trt.ep").module() + # deser_trt_module = torchtrt.load("./trt.ep").module() # Check Pyt and TRT exported program outputs cos_sim = cosine_similarity(model(input), trt_module(input)[0]) assertions.assertTrue( From ed8afa8ffe26191513131d01b37fddba3ed64676 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 8 May 2024 01:00:01 +0800 Subject: [PATCH 11/31] Specify runner for test-infra's build_wheels_windows --- .github/workflows/build-test-windows.yml | 17 +- .github/workflows/build_wheels_windows.yml | 220 --------------------- 2 files changed, 14 insertions(+), 223 deletions(-) delete mode 100644 .github/workflows/build_wheels_windows.yml diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 877a0a82fb..8334911ada 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -24,8 +24,19 @@ jobs: with-rocm: false with-cpu: false - build: + substitute-runner: needs: generate-matrix + outputs: + matrix: ${{ steps.substitute.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - name: Substitute runner + id: substitute + run: | + echo matrix="$(echo '${{ needs.generate-matrix.outputs.matrix }}' | sed -e 's/windows.8xlarge.nvidia.gpu/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} + + build: + needs: substitute-runner permissions: id-token: write contents: read @@ -40,13 +51,13 @@ jobs: smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt name: Build torch-tensorrt whl package - uses: ./.github/workflows/build_wheels_windows.yml + uses: pytorch/test-infra/.github/workflows/build_wheels_windows.yml@main with: repository: ${{ matrix.repository }} ref: "" test-infra-repository: pytorch/test-infra test-infra-ref: main - build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: ${{ matrix.pre-script }} env-script: ${{ matrix.env-script }} wheel-build-params: ${{ matrix.wheel-build-params }} diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml deleted file mode 100644 index 920de5222d..0000000000 --- a/.github/workflows/build_wheels_windows.yml +++ /dev/null @@ -1,220 +0,0 @@ -name: Build Windows Wheels - -on: - workflow_call: - inputs: - repository: - description: 'Repository to checkout, defaults to ""' - default: "" - type: string - ref: - description: 'Reference to checkout, defaults to "nightly"' - default: "nightly" - type: string - test-infra-repository: - description: "Test infra repository to use" - default: "pytorch/test-infra" - type: string - test-infra-ref: - description: "Test infra reference to use" - default: "" - type: string - build-matrix: - description: "Build matrix to utilize" - default: "" - type: string - pre-script: - description: "Pre script to run prior to build" - default: "" - type: string - env-script: - description: "Script to setup environment variables for the build" - default: "" - type: string - wheel-build-params: - description: "Additional parameters for bdist_wheel" - default: "" - type: string - post-script: - description: "Post script to run prior to build" - default: "" - type: string - smoke-test-script: - description: "Script for Smoke Test for a specific domain" - default: "" - type: string - package-name: - description: "Name of the actual python package that is imported" - default: "" - type: string - trigger-event: - description: "Trigger Event in caller that determines whether or not to upload" - default: "" - type: string - cache-path: - description: "The path(s) on the runner to cache or restore. The path is relative to repository." - default: "" - type: string - cache-key: - description: "The key created when saving a cache and the key used to search for a cache." - default: "" - type: string - submodules: - description: "Works as stated in actions/checkout, but the default value is recursive" - required: false - type: string - default: recursive - -permissions: - id-token: write - contents: read - -jobs: - build: - strategy: - fail-fast: false - matrix: ${{ fromJSON(inputs.build-matrix) }} - env: - PYTHON_VERSION: ${{ matrix.python_version }} - PACKAGE_TYPE: wheel - REPOSITORY: ${{ inputs.repository }} - REF: ${{ inputs.ref }} - CU_VERSION: ${{ matrix.desired_cuda }} - UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} - name: ${{ matrix.build_name }} - runs-on: windows.g5.4xlarge.nvidia.gpu - defaults: - run: - shell: bash -l {0} - # If a build is taking longer than 60 minutes on these runners we need - # to have a conversation - timeout-minutes: 60 - steps: - - uses: actions/checkout@v3 - with: - # Support the use case where we need to checkout someone's fork - repository: ${{ inputs.test-infra-repository }} - ref: ${{ inputs.test-infra-ref }} - path: test-infra - - uses: ./test-infra/.github/actions/setup-ssh - name: Setup SSH - with: - github-secret: ${{ secrets.GITHUB_TOKEN }} - activate-with-label: false - instructions: "SSH with rdesktop using ssh -L 3389:localhost:3389 %%username%%@%%hostname%%" - - name: Add Conda scripts to GitHub path - run: | - echo "C:/Jenkins/Miniconda3/Scripts" >> $GITHUB_PATH - - uses: ./test-infra/.github/actions/set-channel - - name: Set PYTORCH_VERSION - if: ${{ env.CHANNEL == 'test' }} - run: | - # When building RC, set the version to be the current candidate version, - # otherwise, leave it alone so nightly will pick up the latest - echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" - - uses: ./test-infra/.github/actions/setup-binary-builds - with: - repository: ${{ inputs.repository }} - ref: ${{ inputs.ref }} - submodules: ${{ inputs.submodules }} - setup-miniconda: false - python-version: ${{ env.PYTHON_VERSION }} - cuda-version: ${{ env.CU_VERSION }} - arch: ${{ env.ARCH }} - - name: Install torch dependency - run: | - source "${BUILD_ENV_FILE}" - # shellcheck disable=SC2086 - ${CONDA_RUN} ${PIP_INSTALL_TORCH} - - name: Run Pre-Script with Caching - if: ${{ inputs.pre-script != '' }} - uses: ./test-infra/.github/actions/run-script-with-cache - with: - cache-path: ${{ inputs.cache-path }} - cache-key: ${{ inputs.cache-key }} - repository: ${{ inputs.repository }} - script: ${{ inputs.pre-script }} - is_windows: 'enabled' - - name: Build clean - working-directory: ${{ inputs.repository }} - run: | - source "${BUILD_ENV_FILE}" - ${CONDA_RUN} python setup.py clean - - name: Build the wheel (bdist_wheel) - working-directory: ${{ inputs.repository }} - env: - ENV_SCRIPT: ${{ inputs.env-script }} - BUILD_PARAMS: ${{ inputs.wheel-build-params }} - run: | - source "${BUILD_ENV_FILE}" - - if [[ "$CU_VERSION" == "cpu" ]]; then - # CUDA and CPU are ABI compatible on the CPU-only parts, so strip - # in this case - export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" - else - export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//')" - fi - - if [[ -z "${ENV_SCRIPT}" ]]; then - ${CONDA_RUN} python setup.py bdist_wheel - else - if [[ ! -f ${ENV_SCRIPT} ]]; then - echo "::error::Specified env-script file (${ENV_SCRIPT}) not found" - exit 1 - else - ${CONDA_RUN} ${ENV_SCRIPT} python setup.py bdist_wheel ${BUILD_PARAMS} - fi - fi - - name: Run post-script - working-directory: ${{ inputs.repository }} - env: - POST_SCRIPT: ${{ inputs.post-script }} - ENV_SCRIPT: ${{ inputs.env-script }} - if: ${{ inputs.post-script != '' }} - run: | - set -euxo pipefail - source "${BUILD_ENV_FILE}" - ${CONDA_RUN} ${ENV_SCRIPT} ${POST_SCRIPT} - - name: Smoke Test - env: - PACKAGE_NAME: ${{ inputs.package-name }} - SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} - run: | - source "${BUILD_ENV_FILE}" - WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") - echo "$WHEEL_NAME" - ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" - if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" - ${CONDA_RUN} python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" - else - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" - ${CONDA_RUN} python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" - fi - # NB: Only upload to GitHub after passing smoke tests - - name: Upload wheel to GitHub - continue-on-error: true - uses: actions/upload-artifact@v3 - with: - name: ${{ env.ARTIFACT_NAME }} - path: ${{ inputs.repository }}/dist/ - - uses: ./test-infra/.github/actions/teardown-windows - if: always() - name: Teardown Windows - - upload: - needs: build - uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main - if: always() - with: - repository: ${{ inputs.repository }} - ref: ${{ inputs.ref }} - test-infra-repository: ${{ inputs.test-infra-repository }} - test-infra-ref: ${{ inputs.test-infra-ref }} - build-matrix: ${{ inputs.build-matrix }} - trigger-event: ${{ inputs.trigger-event }} - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true From bbb3dab7a36eedcd65929fc059d5bbb543e3fc26 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 8 May 2024 21:21:21 +0800 Subject: [PATCH 12/31] Use env var instead of wheel-build-params for release flag --- .github/workflows/build-test-windows.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 8334911ada..dcd675824d 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -47,7 +47,6 @@ jobs: - repository: pytorch/tensorrt pre-script: packaging/pre_build_script_windows.sh env-script: packaging/vc_env_helper.bat - wheel-build-params: "--release" smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt name: Build torch-tensorrt whl package @@ -60,7 +59,6 @@ jobs: build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: ${{ matrix.pre-script }} env-script: ${{ matrix.env-script }} - wheel-build-params: ${{ matrix.wheel-build-params }} smoke-test-script: ${{ matrix.smoke-test-script }} package-name: ${{ matrix.package-name }} trigger-event: ${{ github.event_name }} From 9729183d2d20a591f1d2275b134e63c43f4564bf Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 18 May 2024 08:23:44 +0800 Subject: [PATCH 13/31] Add -o option to unzip --- packaging/pre_build_script_windows.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index 4a184f9b61..da9bcdbdff 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -18,7 +18,7 @@ else fi curl -Lo TensorRT.zip ${TRT_DOWNLOAD_LINK} -unzip TensorRT.zip -d C:/ +unzip -o TensorRT.zip -d C:/ export CUDA_HOME="$(echo ${CUDA_PATH} | sed -e 's#\\#\/#g')" From 786fbc971fff6d1378db82d86a06e04ace065e42 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 22 May 2024 20:32:02 +0800 Subject: [PATCH 14/31] Use fixed version of transformers in tests --- .github/workflows/build-test-windows.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index dcd675824d..f6f8564dd0 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -86,12 +86,12 @@ jobs: pushd . cd tests/modules # Don't use requirements.txt here as it contains tensorrt and torch which should have been installed by now. - ${CONDA_RUN} python -m pip install numpy packaging pyyaml transformers timm pybind11==2.6.2 + ${CONDA_RUN} python -m pip install numpy packaging pyyaml transformers==4.39.3 timm pybind11==2.6.2 ${CONDA_RUN} python hub.py popd pushd . cd tests/py/ts - ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ @@ -170,7 +170,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py popd @@ -249,7 +249,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/core - ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . popd From 411a2231b7227ebf7522142315fb136160854ac7 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 22 May 2024 20:45:51 +0800 Subject: [PATCH 15/31] Add test_dyn_models Was added in https://github.com/pytorch/TensorRT/pull/2750 --- .github/workflows/build-test-windows.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index f6f8564dd0..fc7da62c68 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -198,6 +198,7 @@ jobs: cd tests/py/dynamo ${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py + ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py popd tests-py-dynamo-core: From 087b5cd0047ffd7b7e220ff5e90c71b4d0b40e84 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 24 May 2024 20:29:07 +0800 Subject: [PATCH 16/31] Use requirements.txt in tests --- .github/workflows/build-test-windows.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index fc7da62c68..8a7de443bb 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -91,7 +91,7 @@ jobs: popd pushd . cd tests/py/ts - ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ @@ -170,7 +170,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py popd @@ -250,7 +250,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/core - ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver + ${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . popd From d1497e6bc850837b690882e7b7283651f39a95a4 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Thu, 30 May 2024 13:32:42 +0800 Subject: [PATCH 17/31] Keep up with the changes in main --- .github/workflows/build-test-windows.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 8a7de443bb..98cd7d27e7 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -85,13 +85,10 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/modules - # Don't use requirements.txt here as it contains tensorrt and torch which should have been installed by now. - ${CONDA_RUN} python -m pip install numpy packaging pyyaml transformers==4.39.3 timm pybind11==2.6.2 ${CONDA_RUN} python hub.py popd pushd . cd tests/py/ts - ${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ @@ -170,7 +167,6 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py popd @@ -250,7 +246,6 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/core - ${CONDA_RUN} python -m pip install --pre -r ../requirements.txt --use-deprecated=legacy-resolver ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . popd From 339d3e9932716d7e16396922b4dcba2ef6a6b663 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Thu, 30 May 2024 15:47:15 +0800 Subject: [PATCH 18/31] Unify install-torch-tensorrt script for Linux and Windows --- .../scripts/install-torch-tensorrt-windows.sh | 13 ------------- .github/scripts/install-torch-tensorrt.sh | 17 ++++++++++------- .github/workflows/windows-test.yml | 2 +- 3 files changed, 11 insertions(+), 21 deletions(-) delete mode 100644 .github/scripts/install-torch-tensorrt-windows.sh diff --git a/.github/scripts/install-torch-tensorrt-windows.sh b/.github/scripts/install-torch-tensorrt-windows.sh deleted file mode 100644 index 534eb3fcba..0000000000 --- a/.github/scripts/install-torch-tensorrt-windows.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash -set -eou pipefail -# Source conda so it's available to the script environment -source ${BUILD_ENV_FILE} -export EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/${CU_VERSION}" - -# Install all the dependencies required for Torch-TensorRT -${CONDA_RUN} pip install --pre -r ${PWD}/tests/py/requirements.txt --use-deprecated=legacy-resolver --extra-index-url=${EXTRA_INDEX_URL} - -# Install Torch-TensorRT -${CONDA_RUN} pip install ${RUNNER_ARTIFACT_DIR}/torch_tensorrt*.whl - -echo -e "Running test script"; \ No newline at end of file diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 428d45f5d9..5f65db56c2 100644 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -1,13 +1,16 @@ -#!/usr/bin/env bash set -eou pipefail -# Source conda so it's available to the script environment -source ${BUILD_ENV_FILE} -export EXTRA_INDEX_URL="https://download.pytorch.org/whl/nightly/${CU_VERSION}" + +EXTRA_INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION} +PLATFORM=$(python -c "import sys; print(sys.platform)") # Install all the dependencies required for Torch-TensorRT -${CONDA_RUN} pip install --pre -r ${PWD}/tests/py/requirements.txt --use-deprecated=legacy-resolver --extra-index-url=${EXTRA_INDEX_URL} +pip install --pre -r ${PWD}/tests/py/requirements.txt --use-deprecated=legacy-resolver --extra-index-url=${EXTRA_INDEX_URL} # Install Torch-TensorRT -${CONDA_RUN} pip install /opt/torch-tensorrt-builds/torch_tensorrt*.whl +if [[ ${PLATFORM} == win32 ]]; then + pip install ${RUNNER_ARTIFACT_DIR}/torch_tensorrt*.whl +else + pip install /opt/torch-tensorrt-builds/torch_tensorrt*.whl +fi -echo -e "Running test script"; \ No newline at end of file +echo -e "Running test script"; diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 4a5c51c34c..9ca7b70e1b 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -118,7 +118,7 @@ jobs: { echo "${SCRIPT}"; } > "user_script" - cat .github/scripts/install-torch-tensorrt-windows.sh user_script > exec_script + cat .github/scripts/install-torch-tensorrt.sh user_script > exec_script - name: Run script uses: ./test-infra/.github/actions/run-script-with-cache with: From 3fb0b1ea5c15a312974195bdad9d7b22c5673d86 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Thu, 30 May 2024 16:14:54 +0800 Subject: [PATCH 19/31] Simply use TensorRT cuda 12.4 for building Linux workflow also does this way so the cuda version of TensorRT doesn't necessarily have to match up PyTorch's cuda version. --- packaging/pre_build_script_windows.sh | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index da9bcdbdff..c0fd050068 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -1,23 +1,14 @@ set -eou pipefail -python -m pip install -U numpy packaging pyyaml setuptools wheel +pip install -U numpy packaging pyyaml setuptools wheel # Install TRT from PyPI TRT_VERSION=$(python -c "import yaml; print(yaml.safe_load(open('dev_dep_versions.yml', 'r'))['__tensorrt_version__'])") -python -m pip install tensorrt==${TRT_VERSION} tensorrt-${CU_VERSION::4}==${TRT_VERSION} tensorrt-${CU_VERSION::4}-bindings==${TRT_VERSION} tensorrt-${CU_VERSION::4}-libs==${TRT_VERSION} --extra-index-url https://pypi.nvidia.com +pip install tensorrt==${TRT_VERSION} tensorrt-cu12-bindings==${TRT_VERSION} tensorrt-cu12-libs==${TRT_VERSION} --extra-index-url https://pypi.nvidia.com choco install bazelisk -y -if [ ${CU_VERSION} = cu118 ]; then - TRT_DOWNLOAD_LINK=https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-11.8.zip -elif [ ${CU_VERSION} = cu121 ] || [ ${CU_VERSION} = cu124 ]; then - TRT_DOWNLOAD_LINK=https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip -else - echo "Unsupported CU_VERSION" - exit 1 -fi - -curl -Lo TensorRT.zip ${TRT_DOWNLOAD_LINK} +curl -Lo TensorRT.zip https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip unzip -o TensorRT.zip -d C:/ export CUDA_HOME="$(echo ${CUDA_PATH} | sed -e 's#\\#\/#g')" From 0e4382276bcddf2505aff2875d99339b73d0a215 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 31 May 2024 07:47:07 +0800 Subject: [PATCH 20/31] Add trt-bindings and trt-libs to requirements They don't always get installed automatically on CI and hence lead to "Can not find nvinfer" error. --- tests/py/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 09a9264f4f..beef52e509 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -9,7 +9,9 @@ torchvision>=0.19.0.dev,<0.20.0 --extra-index-url https://pypi.ngc.nvidia.com pyyaml tensorrt==10.0.1 +tensorrt-cu12-bindings==10.0.1 +tensorrt-cu12-libs==10.0.1 timm>=1.0.3 transformers==4.39.3 parameterized>=0.2.0 -expecttest==0.1.6 \ No newline at end of file +expecttest==0.1.6 From 60500c5b4275c19ad2218c4e43b8174b774b410a Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 31 May 2024 18:50:28 +0800 Subject: [PATCH 21/31] Use pypi.nvidia.com instead of pypi.ngc.nvidia.com for extra-index-url --- tests/py/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index beef52e509..6cf19b8040 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -6,7 +6,7 @@ pytest-xdist>=3.6.1 networkx==2.8.8 torch>=2.4.0.dev,<2.5.0 torchvision>=0.19.0.dev,<0.20.0 ---extra-index-url https://pypi.ngc.nvidia.com +--extra-index-url https://pypi.nvidia.com pyyaml tensorrt==10.0.1 tensorrt-cu12-bindings==10.0.1 From ac07bb1bca3411dcb0ca2fb5a34fcdf36e2a70f1 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 31 May 2024 19:13:42 +0800 Subject: [PATCH 22/31] Use windows.8xlarge.nvidia.gpu.nonephemeral runner in test --- .github/workflows/windows-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 9ca7b70e1b..7b8548ae78 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -54,7 +54,7 @@ jobs: SCRIPT: ${{ inputs.script }} PYTHONUTF8: 1 name: ${{ inputs.job-name }}-${{ matrix.desired_cuda }} - runs-on: windows.g5.4xlarge.nvidia.gpu + runs-on: windows.8xlarge.nvidia.gpu.nonephemeral defaults: run: shell: bash -l {0} From c2a80c1dc5b9e58fd5a4ac72333089c62d335176 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 31 May 2024 19:58:02 +0800 Subject: [PATCH 23/31] Sort packages in requirements alphabetically --- tests/py/requirements.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 6cf19b8040..15d63e47e4 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -1,17 +1,17 @@ # This file is specifically to install correct version of libraries during CI testing. # The index url for torch & torchvision libs is configured in install-torch-tensorrt.sh based on CUDA version # networkx library issue: https://discuss.pytorch.org/t/installing-pytorch-under-python-3-8-question-about-networkx-version/196740 +expecttest==0.1.6 +networkx==2.8.8 +parameterized>=0.2.0 pytest>=8.2.1 pytest-xdist>=3.6.1 -networkx==2.8.8 -torch>=2.4.0.dev,<2.5.0 -torchvision>=0.19.0.dev,<0.20.0 ---extra-index-url https://pypi.nvidia.com pyyaml tensorrt==10.0.1 tensorrt-cu12-bindings==10.0.1 tensorrt-cu12-libs==10.0.1 timm>=1.0.3 +torch>=2.4.0.dev,<2.5.0 +torchvision>=0.19.0.dev,<0.20.0 transformers==4.39.3 -parameterized>=0.2.0 -expecttest==0.1.6 +--extra-index-url https://pypi.nvidia.com From 07c0ac955b0bca76d92f25a2aa36944cd92a82b5 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Fri, 31 May 2024 20:04:14 +0800 Subject: [PATCH 24/31] Don't install NumPy 2.0 rc because it breaks CI --- tests/py/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 15d63e47e4..f591de28c7 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -3,6 +3,7 @@ # networkx library issue: https://discuss.pytorch.org/t/installing-pytorch-under-python-3-8-question-about-networkx-version/196740 expecttest==0.1.6 networkx==2.8.8 +numpy<2.0.0 parameterized>=0.2.0 pytest>=8.2.1 pytest-xdist>=3.6.1 From d522edf990a62724489440348ad270bca971f404 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 1 Jun 2024 02:28:09 +0800 Subject: [PATCH 25/31] Remove redundant CONDA_RUN in tests --- .github/workflows/build-test-linux.yml | 30 ++++++++++++------------ .github/workflows/build-test-windows.yml | 30 ++++++++++++------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index ac15abb23f..0bd570155e 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -80,13 +80,13 @@ jobs: export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH pushd . cd tests/modules - ${CONDA_RUN} python hub.py + python hub.py popd pushd . cd tests/py/ts - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ popd tests-py-dynamo-converters: @@ -114,7 +114,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/ popd tests-py-dynamo-fe: @@ -142,8 +142,8 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py popd tests-py-dynamo-serde: @@ -171,7 +171,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py popd tests-py-torch-compile-be: @@ -199,9 +199,9 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py popd tests-py-dynamo-core: @@ -229,9 +229,9 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/ - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ popd tests-py-core: @@ -259,7 +259,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/core - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . popd concurrency: diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 98cd7d27e7..1bdb52ae8a 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -85,13 +85,13 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/modules - ${CONDA_RUN} python hub.py + python hub.py popd pushd . cd tests/py/ts - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ popd tests-py-dynamo-converters: @@ -116,7 +116,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/ + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/ popd tests-py-dynamo-fe: @@ -141,8 +141,8 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py popd tests-py-dynamo-serde: @@ -167,7 +167,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py popd tests-py-torch-compile-be: @@ -192,9 +192,9 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py - ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py + python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py popd tests-py-dynamo-core: @@ -219,9 +219,9 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/dynamo - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/ - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ popd tests-py-core: @@ -246,7 +246,7 @@ jobs: export USE_HOST_DEPS=1 pushd . cd tests/py/core - ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . popd concurrency: From aa3112547b8b0c88c83cd8e9fe6a1cdae3997c4e Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 1 Jun 2024 14:43:38 +0800 Subject: [PATCH 26/31] assertEquals is removed in Python 3.12 --- tests/py/dynamo/partitioning/test_dynamic_partitioning.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/py/dynamo/partitioning/test_dynamic_partitioning.py b/tests/py/dynamo/partitioning/test_dynamic_partitioning.py index 9b18c1fc2f..0c9ee14b12 100644 --- a/tests/py/dynamo/partitioning/test_dynamic_partitioning.py +++ b/tests/py/dynamo/partitioning/test_dynamic_partitioning.py @@ -48,12 +48,12 @@ def forward(self, x): elif "_run_on_gpu" in submod[0]: pyt_segments += 1 - self.assertEquals( + self.assertEqual( trt_segments, 1, f"Number of TRT segments should be 1 but got {trt_segments}", ) - self.assertEquals( + self.assertEqual( pyt_segments, 1, f"Number of PyTorch segments should be 1 but got {pyt_segments}", @@ -93,12 +93,12 @@ def forward(self, x): elif "_run_on_gpu" in submod[0]: pyt_segments += 1 - self.assertEquals( + self.assertEqual( trt_segments, 1, f"Number of TRT segments should be 2 but got {trt_segments}", ) - self.assertEquals( + self.assertEqual( pyt_segments, 0, f"Number of PyTorch segments should be 0 but got {pyt_segments}", From 6e876b8fb7c1c44ba7916bc1d150acfe15dc6ddd Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sat, 1 Jun 2024 15:09:19 +0800 Subject: [PATCH 27/31] Fix potential torchvision error due to dependency conflict Installing torchvision with legacy resolver could cause the installed version of torch conflict with the required version from torchvision, leading to errors like `RuntimeError: operator torchvision::nms does not exist` or `AttributeError: partially initialized module 'torchvision' has no attribute 'extension'` --- .github/scripts/install-torch-tensorrt.sh | 5 +++-- tests/py/requirements.txt | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 5f65db56c2..792a140672 100644 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -1,10 +1,11 @@ set -eou pipefail -EXTRA_INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION} +INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION} PLATFORM=$(python -c "import sys; print(sys.platform)") # Install all the dependencies required for Torch-TensorRT -pip install --pre -r ${PWD}/tests/py/requirements.txt --use-deprecated=legacy-resolver --extra-index-url=${EXTRA_INDEX_URL} +pip install --pre "torch>=2.4.0.dev,<2.5.0" "torchvision>=0.19.0.dev,<0.20.0" --index-url ${INDEX_URL} +pip install --pre -r ${PWD}/tests/py/requirements.txt --use-deprecated legacy-resolver # Install Torch-TensorRT if [[ ${PLATFORM} == win32 ]]; then diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index f591de28c7..24b9e18790 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -1,5 +1,4 @@ # This file is specifically to install correct version of libraries during CI testing. -# The index url for torch & torchvision libs is configured in install-torch-tensorrt.sh based on CUDA version # networkx library issue: https://discuss.pytorch.org/t/installing-pytorch-under-python-3-8-question-about-networkx-version/196740 expecttest==0.1.6 networkx==2.8.8 @@ -12,7 +11,5 @@ tensorrt==10.0.1 tensorrt-cu12-bindings==10.0.1 tensorrt-cu12-libs==10.0.1 timm>=1.0.3 -torch>=2.4.0.dev,<2.5.0 -torchvision>=0.19.0.dev,<0.20.0 transformers==4.39.3 --extra-index-url https://pypi.nvidia.com From 19e2eed65a6d99c179c0eddad378c67bb0214df0 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Tue, 4 Jun 2024 22:49:03 +0800 Subject: [PATCH 28/31] Address review comment --- .github/scripts/install-torch-tensorrt.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 792a140672..9a6b2a8b8b 100644 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -1,10 +1,11 @@ set -eou pipefail +TORCH_TORCHVISION=$(grep "^torch" ${PWD}/py/requirements.txt) INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION} PLATFORM=$(python -c "import sys; print(sys.platform)") # Install all the dependencies required for Torch-TensorRT -pip install --pre "torch>=2.4.0.dev,<2.5.0" "torchvision>=0.19.0.dev,<0.20.0" --index-url ${INDEX_URL} +pip install --pre ${TORCH_TORCHVISION} --index-url ${INDEX_URL} pip install --pre -r ${PWD}/tests/py/requirements.txt --use-deprecated legacy-resolver # Install Torch-TensorRT From 6ceda3557a6c72b10b06d2762f4f12cfb0418f52 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 12 Jun 2024 21:02:34 +0800 Subject: [PATCH 29/31] Use tempdir in test_export_serde --- tests/py/dynamo/models/test_export_serde.py | 28 ++++++++++++--------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py index 9f2a754bbe..58e0115886 100644 --- a/tests/py/dynamo/models/test_export_serde.py +++ b/tests/py/dynamo/models/test_export_serde.py @@ -1,3 +1,5 @@ +import os +import tempfile import unittest import pytest @@ -8,6 +10,8 @@ assertions = unittest.TestCase() +trt_ep_path = os.path.join(tempfile.gettempdir(), "trt.ep") + @pytest.mark.unit def test_base_full_compile(ir): @@ -42,9 +46,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, "./trt.ep", inputs=[input]) + torchtrt.save(trt_module, trt_ep_path, inputs=[input]) # TODO: Enable this serialization issues are fixed - # deser_trt_module = torchtrt.load("./trt.ep").module() + # deser_trt_module = torchtrt.load(trt_ep_path).module() # Check Pyt and TRT exported program outputs cos_sim = cosine_similarity(model(input), trt_module(input)[0]) assertions.assertTrue( @@ -94,9 +98,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, "./trt.ep", inputs=[input]) + torchtrt.save(trt_module, trt_ep_path, inputs=[input]) # TODO: Enable this serialization issues are fixed - # deser_trt_module = torchtrt.load("./trt.ep").module() + # deser_trt_module = torchtrt.load(trt_ep_path).module() # Check Pyt and TRT exported program outputs outputs_pyt = model(input) outputs_trt = trt_module(input) @@ -151,9 +155,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, "./trt.ep", inputs=[input]) + torchtrt.save(trt_module, trt_ep_path, inputs=[input]) # TODO: Enable this serialization issues are fixed - # deser_trt_module = torchtrt.load("./trt.ep").module() + # deser_trt_module = torchtrt.load(trt_ep_path).module() # Check Pyt and TRT exported program outputs outputs_pyt = model(input) outputs_trt = trt_module(input) @@ -211,9 +215,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, "./trt.ep", inputs=[input]) + torchtrt.save(trt_module, trt_ep_path, inputs=[input]) # TODO: Enable this serialization issues are fixed - # deser_trt_module = torchtrt.load("./trt.ep").module() + # deser_trt_module = torchtrt.load(trt_ep_path).module() outputs_pyt = model(input) outputs_trt = trt_module(input) for idx in range(len(outputs_pyt)): @@ -253,9 +257,9 @@ def test_resnet18(ir): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, "./trt.ep", inputs=[input]) + torchtrt.save(trt_module, trt_ep_path, inputs=[input]) # TODO: Enable this serialization issues are fixed - # deser_trt_module = torchtrt.load("./trt.ep").module() + # deser_trt_module = torchtrt.load(trt_ep_path).module() outputs_pyt = model(input) outputs_trt = trt_module(input) cos_sim = cosine_similarity(outputs_pyt, outputs_trt[0]) @@ -309,9 +313,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, "./trt.ep", inputs=[input]) + torchtrt.save(trt_module, trt_ep_path, inputs=[input]) # TODO: Enable this serialization issues are fixed - # deser_trt_module = torchtrt.load("./trt.ep").module() + # deser_trt_module = torchtrt.load(trt_ep_path).module() outputs_pyt = model(input) outputs_trt = trt_module(input) From 11d029388231352f0985809aad4ac4a8c1041153 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 12 Jun 2024 21:14:35 +0800 Subject: [PATCH 30/31] Add back CUDA version specialization for bindings and libs --- packaging/pre_build_script_windows.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index c0fd050068..9724e18327 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -4,7 +4,7 @@ pip install -U numpy packaging pyyaml setuptools wheel # Install TRT from PyPI TRT_VERSION=$(python -c "import yaml; print(yaml.safe_load(open('dev_dep_versions.yml', 'r'))['__tensorrt_version__'])") -pip install tensorrt==${TRT_VERSION} tensorrt-cu12-bindings==${TRT_VERSION} tensorrt-cu12-libs==${TRT_VERSION} --extra-index-url https://pypi.nvidia.com +pip install tensorrt==${TRT_VERSION} tensorrt-${CU_VERSION::4}-bindings==${TRT_VERSION} tensorrt-${CU_VERSION::4}-libs==${TRT_VERSION} --extra-index-url https://pypi.nvidia.com choco install bazelisk -y From 403ebc95d8c600367f9c259c6efabe110ff96587 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Thu, 13 Jun 2024 08:10:31 +0800 Subject: [PATCH 31/31] Move trt_bindings and trt_libs to pyproject.toml --- pyproject.toml | 2 ++ tests/py/requirements.txt | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4313383431..e8e16d5f3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,8 @@ keywords = ["pytorch", "torch", "tensorrt", "trt", "ai", "artificial intelligenc dependencies = [ "torch >=2.4.0.dev,<2.5.0", "tensorrt==10.0.1", + "tensorrt-cu12_bindings==10.0.1", + "tensorrt-cu12_libs==10.0.1", "packaging>=23", "numpy", "typing-extensions>=4.7.0", diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 24b9e18790..bdae578713 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -8,8 +8,6 @@ pytest>=8.2.1 pytest-xdist>=3.6.1 pyyaml tensorrt==10.0.1 -tensorrt-cu12-bindings==10.0.1 -tensorrt-cu12-libs==10.0.1 timm>=1.0.3 transformers==4.39.3 --extra-index-url https://pypi.nvidia.com