feat: Upgrade Docker build to use custom TRT + CUDNN (#1805)

gs-olive · bowang007 · commit d708fa5cc7f2 · 2023-04-28T16:22:56.000-07:00
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,39 +1,54 @@
 # Base image starts with CUDA
-ARG BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu20.04
+ARG BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu22.04
 FROM ${BASE_IMG} as base
 
+ARG TENSORRT_VERSION
+RUN test -n "$TENSORRT_VERSION" || (echo "No tensorrt version specified, please use --build-arg TENSORRT_VERSION=x.y.z to specify a version." && exit 1)
+ARG CUDNN_VERSION
+RUN test -n "$CUDNN_VERSION" || (echo "No cudnn version specified, please use --build-arg CUDNN_VERSION=x.y.z to specify a version." && exit 1)
+
+ARG PYTHON_VERSION=3.10
+ENV PYTHON_VERSION=${PYTHON_VERSION}
+
 ARG USE_CXX11_ABI
 ENV USE_CXX11=${USE_CXX11_ABI}
+ENV DEBIAN_FRONTEND=noninteractive
 
 # Install basic dependencies
 RUN apt-get update
-RUN DEBIAN_FRONTEND=noninteractive apt install -y build-essential manpages-dev wget zlib1g software-properties-common git
-RUN add-apt-repository ppa:deadsnakes/ppa
-RUN apt install -y python3.8 python3.8-distutils python3.8-dev
-RUN wget https://bootstrap.pypa.io/get-pip.py
-RUN ln -s /usr/bin/python3.8 /usr/bin/python
-RUN python get-pip.py
-RUN pip3 install wheel
-
-# Install CUDNN + TensorRT
-RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
-RUN mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
-RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+RUN apt install -y build-essential manpages-dev wget zlib1g software-properties-common git libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev mecab-ipadic-utf8
+
+# Install PyEnv and desired Python version
+ENV HOME="/root"
+ENV PYENV_DIR="$HOME/.pyenv"
+ENV PATH="$PYENV_DIR/shims:$PYENV_DIR/bin:$PATH"
+RUN wget -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer &&\
+    chmod 755 pyenv-installer &&\
+    bash pyenv-installer &&\
+    eval "$(pyenv init -)"
+
+RUN pyenv install -v ${PYTHON_VERSION}
+RUN pyenv global ${PYTHON_VERSION}
+
+# Install CUDNN + TensorRT + dependencies
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin
+RUN mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/7fa2af80.pub
 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 536F8F1DE80F6A35
 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC
-RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
 RUN apt-get update
-RUN apt-get install -y libcudnn8=8.5.0* libcudnn8-dev=8.5.0*
+RUN apt-get install -y libcudnn8=${CUDNN_VERSION}* libcudnn8-dev=${CUDNN_VERSION}*
 
-RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
-RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
+RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
 RUN apt-get update
 
-RUN apt-get install -y libnvinfer8=8.5.1* libnvinfer-plugin8=8.5.1* libnvinfer-dev=8.5.1* libnvinfer-plugin-dev=8.5.1* libnvonnxparsers8=8.5.1-1* libnvonnxparsers-dev=8.5.1-1* libnvparsers8=8.5.1-1*  libnvparsers-dev=8.5.1-1*
+RUN apt-get install -y libnvinfer8=${TENSORRT_VERSION}* libnvinfer-plugin8=${TENSORRT_VERSION}* libnvinfer-dev=${TENSORRT_VERSION}* libnvinfer-plugin-dev=${TENSORRT_VERSION}* libnvonnxparsers8=${TENSORRT_VERSION}-1* libnvonnxparsers-dev=${TENSORRT_VERSION}-1* libnvparsers8=${TENSORRT_VERSION}-1*  libnvparsers-dev=${TENSORRT_VERSION}-1*
 
-# Setup Bazel
-RUN wget -q https://github.com/bazelbuild/bazelisk/releases/download/v1.16.0/bazelisk-linux-amd64 -O /usr/bin/bazel \
- && chmod a+x /usr/bin/bazel
+# Setup Bazel via Bazelisk
+RUN wget -q https://github.com/bazelbuild/bazelisk/releases/download/v1.16.0/bazelisk-linux-amd64 -O /usr/bin/bazel &&\
+    chmod a+x /usr/bin/bazel
 
 # Build Torch-TensorRT in an auxillary container
 FROM base as torch-tensorrt-builder-base
@@ -42,18 +57,24 @@ ARG ARCH="x86_64"
 ARG TARGETARCH="amd64"
 
 RUN apt-get install -y python3-setuptools
-RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
 
-RUN apt-get update && apt-get install -y --no-install-recommends locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8
+RUN apt-get update &&\
+    apt-get install -y --no-install-recommends locales ninja-build &&\
+    rm -rf /var/lib/apt/lists/* &&\
+    locale-gen en_US.UTF-8
 
 FROM torch-tensorrt-builder-base as torch-tensorrt-builder
 
 COPY . /workspace/torch_tensorrt/src
 WORKDIR /workspace/torch_tensorrt/src
 RUN cp ./docker/WORKSPACE.docker WORKSPACE
 
+# Symlink the path pyenv is using for python with the /opt directory for package sourcing
+RUN ln -s "`pyenv which python | xargs dirname | xargs dirname`/lib/python$PYTHON_VERSION/site-packages" "/opt/python3"
+
 # This script builds both libtorchtrt bin/lib/include tarball and the Python wheel, in dist/
-RUN ./docker/dist-build.sh
+RUN bash ./docker/dist-build.sh
 
 # Copy and install Torch-TRT into the main container
 FROM base as torch-tensorrt
@@ -63,10 +84,12 @@ COPY --from=torch-tensorrt-builder  /workspace/torch_tensorrt/src/py/dist/ .
 
 RUN cp /opt/torch_tensorrt/docker/WORKSPACE.docker /opt/torch_tensorrt/WORKSPACE
 RUN pip install -r /opt/torch_tensorrt/py/requirements.txt
-RUN pip3 install *.whl && rm -fr /workspace/torch_tensorrt/py/dist/* *.whl
+RUN pip install tensorrt==${TENSORRT_VERSION}.*
+RUN pip install *.whl && rm -fr /workspace/torch_tensorrt/py/dist/* *.whl
 
 WORKDIR /opt/torch_tensorrt
-ENV LD_LIBRARY_PATH /usr/local/lib/python3.8/dist-packages/torch/lib:/usr/local/lib/python3.8/dist-packages/torch_tensorrt/lib:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}
-ENV PATH /usr/local/lib/python3.8/dist-packages/torch_tensorrt/bin:${PATH}
+
+ENV LD_LIBRARY_PATH /opt/python3/site-packages/torch/lib:/opt/python3/site-packages/torch_tensorrt/lib:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}
+ENV PATH /opt/python3/site-packages/torch_tensorrt/bin:${PATH}
 
 CMD /bin/bash
diff --git a/docker/README.md b/docker/README.md
@@ -2,7 +2,10 @@
 
 * Use `Dockerfile` to build a container which provides the exact development environment that our master branch is usually tested against.
 
-* `Dockerfile` currently uses the exact library versions (Torch, CUDA, CUDNN, TensorRT) listed in <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> to build Torch-TensorRT.
+* The `Dockerfile` currently uses <a href="https://github.com/bazelbuild/bazelisk">Bazelisk</a> to select the Bazel version, and uses the exact library versions of Torch and CUDA listed in <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a>.
+  * The desired versions of CUDNN and TensorRT must be specified as build-args, with major, minor, and patch versions as in: `--build-arg TENSORRT_VERSION=a.b.c --build-arg CUDNN_VERSION=x.y.z`
+  * [**Optional**] The desired base image be changed by explicitly setting a base image, as in `--build-arg BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu22.04`, though this is optional
+  * [**Optional**] Additionally, the desired Python version can be changed by explicitly setting a version, as in `--build-arg PYTHON_VERSION=3.10`, though this is optional as well.
 
 * This `Dockerfile` installs `pre-cxx11-abi` versions of Pytorch and builds Torch-TRT using `pre-cxx11-abi` libtorch as well.
 
@@ -14,11 +17,14 @@ Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch
 
 ### Instructions
 
+- The example below uses CUDNN 8.5.0 and TensorRT 8.5.1
+- See <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> for a list of current default dependencies.
+
 > From root of Torch-TensorRT repo
 
 Build:
 ```
-DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile -t torch_tensorrt:latest .
+DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.5.1 --build-arg CUDNN_VERSION=8.5.0 -f docker/Dockerfile -t torch_tensorrt:latest .
 ```
 
 Run:
diff --git a/docker/WORKSPACE.docker b/docker/WORKSPACE.docker
@@ -50,13 +50,13 @@ new_local_repository(
 
 new_local_repository(
     name = "libtorch",
-    path = "/usr/local/lib/python3.8/dist-packages/torch/",
+    path = "/opt/python3/site-packages/torch/",
     build_file = "third_party/libtorch/BUILD"
 )
 
 new_local_repository(
     name = "libtorch_pre_cxx11_abi",
-    path = "/usr/local/lib/python3.8/dist-packages/torch/",
+    path = "/opt/python3/site-packages/torch/",
     build_file = "third_party/libtorch/BUILD"
 )
 
diff --git a/docker/WORKSPACE.ngc b/docker/WORKSPACE.ngc
@@ -33,7 +33,7 @@ git_repository(
 # This is currently used in pytorch NGC container CI testing.
 local_repository(
     name = "torch_tensorrt",
-    path = "/usr/local/lib/python3.8/dist-packages/torch_tensorrt"
+    path = "/opt/python3/site-packages/torch_tensorrt/"
 )
 
 # CUDA should be installed on the system locally
@@ -55,13 +55,13 @@ new_local_repository(
 
 new_local_repository(
     name = "libtorch",
-    path = "/usr/local/lib/python3.8/dist-packages/torch",
+    path = "/opt/python3/site-packages/torch/",
     build_file = "third_party/libtorch/BUILD"
 )
 
 new_local_repository(
     name = "libtorch_pre_cxx11_abi",
-    path = "/usr/local/lib/python3.8/dist-packages/torch",
+    path = "/opt/python3/site-packages/torch/",
     build_file = "third_party/libtorch/BUILD"
 )
 
diff --git a/docker/dist-build.sh b/docker/dist-build.sh
@@ -10,9 +10,13 @@ fi
 
 cd ${TOP_DIR} \
     && mkdir -p dist && cd py \
-    && pip install -r requirements.txt \
-    && MAX_JOBS=1 LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8 \
-        ${BUILD_CMD} $* || exit 1
+    && pip install -r requirements.txt
+
+# Symlink the path pyenv is using for python with the /opt directory for package sourcing
+ln -s "`pyenv which python | xargs dirname | xargs dirname`/lib/python$PYTHON_VERSION/site-packages" "/opt/python3"
+
+# Build Torch-TRT
+MAX_JOBS=1 LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8 ${BUILD_CMD} $* || exit 1
 
 pip3 install ipywidgets --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org
 jupyter nbextension enable --py widgetsnbextension

Original file line number	Diff line number	Diff line change
`@@ -50,13 +50,13 @@ new_local_repository(`
`50`	`50`
`51`	`51`	`new_local_repository(`
`52`	`52`	`name = "libtorch",`
`53`		`- path = "/usr/local/lib/python3.8/dist-packages/torch/",`
	`53`	`+ path = "/opt/python3/site-packages/torch/",`
`54`	`54`	`build_file = "third_party/libtorch/BUILD"`
`55`	`55`	`)`
`56`	`56`
`57`	`57`	`new_local_repository(`
`58`	`58`	`name = "libtorch_pre_cxx11_abi",`
`59`		`- path = "/usr/local/lib/python3.8/dist-packages/torch/",`
	`59`	`+ path = "/opt/python3/site-packages/torch/",`
`60`	`60`	`build_file = "third_party/libtorch/BUILD"`
`61`	`61`	`)`
`62`	`62`