feat: Add example usage scripts for dynamo path

gs-olive · gs-olive · commit e6a1193de616 · 2023-05-15T20:22:06.000-07:00
- Add sample scripts covering resnet18, transformers, and custom
examples showcasing the `torch_tensorrt.dynamo.torch_compile` path,
which can compile models with data-dependent control flow and other such
restrictions which can make other compilation methods more difficult
- Cover different customizeable features allowed in the new backend
- Make scripts interactive Jupyter notebooks
diff --git a/examples/dynamo/torch_compile_advanced_usage.ipynb b/examples/dynamo/torch_compile_advanced_usage.ipynb
@@ -0,0 +1,152 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ff5a530b",
+   "metadata": {},
+   "source": [
+    "# Overview\n",
+    "This interactive notebook is intended as an overview of the process by which `torch_tensorrt.dynamo.torch_compile` works, and how it integrates with the new `torch.compile` API."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2dae5728",
+   "metadata": {},
+   "source": [
+    "## Imports and Model Definition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6fd29ec8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from torch_tensorrt.dynamo.torch_compile import create_backend\n",
+    "from torch_tensorrt.fx.lower_setting import LowerPrecision"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eafb701f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We begin by defining a model\n",
+    "class Model(torch.nn.Module):\n",
+    "    def __init__(self) -> None:\n",
+    "        super().__init__()\n",
+    "        self.relu = torch.nn.ReLU()\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor, y: torch.Tensor):\n",
+    "        x_out = self.relu(x)\n",
+    "        y_out = self.relu(y)\n",
+    "        x_y_out = x_out + y_out\n",
+    "        return torch.mean(x_y_out)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7debfc0",
+   "metadata": {},
+   "source": [
+    "## Compilation with `torch.compile` Using Default Settings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ad82f23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define sample float inputs and initialize model\n",
+    "sample_inputs = [torch.rand((5, 7)).cuda(), torch.rand((5, 7)).cuda()]\n",
+    "model = Model().eval().cuda()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "33a4d0e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Next, we compile the model using torch.compile\n",
+    "# For the default settings, we can simply call torch.compile\n",
+    "# with the backend \"tensorrt\", and run the model on an\n",
+    "# input to cause compilation, as so:\n",
+    "optimized_model = torch.compile(model, backend=\"tensorrt\")\n",
+    "optimized_model(*sample_inputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff31119d",
+   "metadata": {},
+   "source": [
+    "## Compilation with `torch.compile` Using Custom Settings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "be6692d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define sample half inputs and initialize model\n",
+    "sample_inputs_half = [torch.rand((5, 7)).half().cuda(), torch.rand((5, 7)).half().cuda()]\n",
+    "model_half = Model().eval().cuda()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67240828",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If we want to customize certain options in the backend,\n",
+    "# but still use the torch.compile call directly, we can call the\n",
+    "# convenience/helper function create_backend to create a custom backend\n",
+    "# which has been pre-populated with certain keys\n",
+    "custom_backend = create_backend(\n",
+    "    lower_precision=LowerPrecision.FP16,\n",
+    "    debug=True,\n",
+    "    min_block_size=2,\n",
+    "    torch_executed_ops={},\n",
+    ")\n",
+    "\n",
+    "# Run the model on an input to cause compilation, as so:\n",
+    "optimized_model_custom = torch.compile(model_half, backend=custom_backend)\n",
+    "optimized_model_custom(*sample_inputs_half)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/dynamo/torch_compile_resnet_example.ipynb b/examples/dynamo/torch_compile_resnet_example.ipynb
@@ -0,0 +1,157 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c0b24bfc",
+   "metadata": {},
+   "source": [
+    "# Overview\n",
+    "This script is intended as a sample of the `torch_tensorrt.dynamo.torch_compile` workflow on a ResNet model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "04ecf2a9",
+   "metadata": {},
+   "source": [
+    "## Imports and Model Definition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6fc05cb6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from torch_tensorrt.dynamo import torch_compile\n",
+    "import torchvision.models as models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb2b9221",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize model with half precision and sample inputs\n",
+    "model = models.resnet18(pretrained=True).half().eval().to(\"cuda\")\n",
+    "inputs = [torch.randn((1, 3, 224, 224)).to(\"cuda\").half()]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ee3ab312",
+   "metadata": {},
+   "source": [
+    "## Optional Input Arguments to `torch_tensorrt.dynamo.torch_compile`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6864197b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Enabled precision for TensorRT optimization\n",
+    "enabled_precisions = {torch.half}\n",
+    "\n",
+    "# Whether to print verbose logs\n",
+    "debug = True\n",
+    "\n",
+    "# Workspace size for TensorRT\n",
+    "workspace_size = 20 << 30\n",
+    "\n",
+    "# Maximum number of TRT Engines\n",
+    "# (Lower value allows more graph segmentation)\n",
+    "min_block_size = 3\n",
+    "\n",
+    "# Operations to Run in Torch, regardless of converter support\n",
+    "torch_executed_ops = {}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7648ba15",
+   "metadata": {},
+   "source": [
+    "## Compilation with `torch_tensorrt.dynamo.torch_compile`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1be9d0b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build and compile the model with torch.compile, using tensorrt backend\n",
+    "optimized_model = torch_compile(\n",
+    "    model,\n",
+    "    inputs,\n",
+    "    enabled_precisions=enabled_precisions,\n",
+    "    debug=debug,\n",
+    "    workspace_size=workspace_size,\n",
+    "    min_block_size=min_block_size,\n",
+    "    torch_executed_ops=torch_executed_ops,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c42544f",
+   "metadata": {},
+   "source": [
+    "## Inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6acf9768",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Does not cause recompilation (same batch size as input)\n",
+    "new_inputs = [torch.randn((1, 3, 224, 224)).half().to(\"cuda\")]\n",
+    "new_outputs = optimized_model(*new_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83185cf2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Does cause recompilation (new batch size)\n",
+    "new_batch_size_inputs = [torch.randn((8, 3, 224, 224)).half().to(\"cuda\")]\n",
+    "new_batch_size_outputs = optimized_model(*new_batch_size_inputs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/dynamo/torch_compile_transformers_example.ipynb b/examples/dynamo/torch_compile_transformers_example.ipynb