dc3671 · dc3671 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/.devcontainer/devcontainer.env b/.devcontainer/devcontainer.env
@@ -0,0 +1,10 @@
+# Environment variables used to configure the Dev Container setup.
+#
+# The syntax needs to be compatible with
+#   https://docs.docker.com/compose/how-tos/environment-variables/variable-interpolation/#env-file-syntax
+#
+# Edit this file as necessary. For local changes not to be committed back
+# to the repository, create/edit devcontainer.env.user instead.
+HF_HOME_DEFAULT="${HOME}/.cache/huggingface"
+HF_HOME_XDG_DEFAULT="${XDG_CACHE_HOME:-${HF_HOME_DEFAULT}}"
+LOCAL_HF_HOME="${HF_HOME:-${HF_HOME_XDG_DEFAULT}}"
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -3,24 +3,18 @@
     {
       "name": "TRT-LLM Devcontainer",
       "dockerComposeFile": [
-        "docker-compose.yml"
+        "docker-compose.yml",
+        "docker-compose.override.yml"
       ],
       "service": "tensorrt_llm-dev",
       "remoteUser": "ubuntu",
       "containerEnv": {
-        // "CCACHE_DIR" : "/home/coder/${localWorkspaceFolderBasename}/cpp/.ccache",
-        // "CCACHE_BASEDIR" : "/home/coder/${localWorkspaceFolderBasename}",
         "HF_TOKEN": "${localEnv:HF_TOKEN}",
         "HF_HOME": "/huggingface",
         "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history"
       },
       "workspaceFolder": "/workspaces/tensorrt_llm",
-      // "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
-      // "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
-      "mounts": [
-        "source=${localEnv:HOME}/.cache/huggingface,target=/huggingface,type=bind", // HF cache
-        "source=/home/scratch.trt_llm_data/,target=/home/scratch.trt_llm_data/,type=bind,consistency=consistent"
-      ],
+      "initializeCommand": "cd ${localWorkspaceFolder} && ./.devcontainer/make_env.py",
       // Note: sourcing .profile is required since we use a local user and the python interpreter is
       // global (/usr/bin/python). In this case, pip will default to a local user path which is not
       // by default in the PATH. In interactive devcontainer shells, .profile is sourced by default.
@@ -43,7 +37,9 @@
             // "ms-vscode.cmake-tools",
             // Git & Github
             // "GitHub.vscode-pull-request-github"
-            "eamodio.gitlens"
+            "eamodio.gitlens",
+            // Docs
+            "ms-vscode.live-server"
           ],
           "settings": {
             "C_Cpp.intelliSenseEngine": "disabled",

diff --git a/.devcontainer/docker-compose.override-example.yml b/.devcontainer/docker-compose.override-example.yml
@@ -0,0 +1,8 @@
+# Example .devcontainer/docker-compose.override.yml
+version: "3.9"
+services:
+  tensorrt_llm-dev:
+    volumes:
+      # Uncomment the following lines to enable
+      # # Mount TRTLLM data volume:
+      # - /home/scratch.trt_llm_data/:/home/scratch.trt_llm_data/:ro
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
@@ -1,7 +1,7 @@
 version: "3.9"
 services:
   tensorrt_llm-dev:
-    image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202506051650-4885
+    image: ${DEV_CONTAINER_IMAGE}
     network_mode: host
     ipc: host
 
@@ -22,7 +22,8 @@ services:
               capabilities: [gpu]
 
     volumes:
-      - ..:/workspaces/tensorrt_llm:cached
+      - ${SOURCE_DIR}:/workspaces/tensorrt_llm
+      - ${LOCAL_HF_HOME}:/huggingface  # HF cache
 
     environment:
       - CCACHE_DIR=/workspaces/tensorrt_llm/cpp/.ccache

diff --git a/.devcontainer/make_env.py b/.devcontainer/make_env.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+
+import json
+import logging
+import os
+import re
+import shlex
+import subprocess
+import sys
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Dict, List, Optional
+
+JENKINS_PROPS_PATH = Path("jenkins/current_image_tags.properties")
+DEV_CONTAINER_ENV_PATH = Path(".devcontainer/devcontainer.env")
+DEV_CONTAINER_USER_ENV_PATH = Path(".devcontainer/devcontainer.env.user")
+DOT_ENV_PATH = Path(".devcontainer/.env")
+COMPOSE_OVERRIDE_PATH = Path(".devcontainer/docker-compose.override.yml")
+COMPOSE_OVERRIDE_EXAMPLE_PATH = Path(
+    ".devcontainer/docker-compose.override-example.yml")
+
+HOME_DIR_VAR = "HOME_DIR"
+SOURCE_DIR_VAR = "SOURCE_DIR"
+DEV_CONTAINER_IMAGE_VAR = "DEV_CONTAINER_IMAGE"
+BUILD_LOCAL_VAR = "BUILD_LOCAL"
+JENKINS_IMAGE_VAR = "LLM_DOCKER_IMAGE"
+LOCAL_HF_HOME_VAR = "LOCAL_HF_HOME"
+
+LOGGER = logging.getLogger("make_env")
+
+
+def _load_env(env_files: List[Path]) -> Dict[str, str]:
+    """Evaluate files using 'sh' and return resulting environment."""
+    with TemporaryDirectory("trtllm_make_env") as temp_dir:
+        json_path = Path(temp_dir) / 'env.json'
+        subprocess.run(
+            ("(echo set -a && cat " +
+             " ".join(shlex.quote(str(env_file)) for env_file in env_files) +
+             " && echo && echo exec /usr/bin/env python3 -c \"'import json; import os; print(json.dumps(dict(os.environ)))'\""
+             + f") | sh > {json_path}"),
+            shell=True,
+            check=True,
+        )
+        with open(json_path, "r") as f:
+            env = json.load(f)
+    return env
+
+
+def _detect_rootless() -> bool:
+    proc = subprocess.run("./docker/detect_rootless.sh",
+                          capture_output=True,
+                          check=True,
+                          shell=True)
+    return bool(int(proc.stdout.decode("utf-8").strip()))
+
+
+def _handle_rootless(env_inout: Dict[str, str]):
+    is_rootless = _detect_rootless()
+    if is_rootless:
+        LOGGER.info("Docker Rootless Mode detected.")
+        if HOME_DIR_VAR not in env_inout:
+            raise ValueError(
+                "Docker Rootless Mode requires setting HOME_DIR in devcontainer.env.user"
+            )
+        if SOURCE_DIR_VAR not in env_inout:
+            raise ValueError(
+                "Docker Rootless Mode requires setting SOURCE_DIR in devcontainer.env.user"
+            )
+
+        # Handle HF_HOME
+        if "HF_HOME" in os.environ and "HF_HOME" in env_inout:
+            raise ValueError(
+                "Docker Rootless Mode requires either not setting HF_HOME at all or overriding it in devcontainer.env.user"
+            )
+        if env_inout[LOCAL_HF_HOME_VAR].startswith(env_inout["HOME"]):
+            env_inout[LOCAL_HF_HOME_VAR] = env_inout[LOCAL_HF_HOME_VAR].replace(
+                env_inout["HOME"], env_inout[HOME_DIR_VAR], 1)
+    else:
+        env_inout[HOME_DIR_VAR] = env_inout["HOME"]
+        env_inout[SOURCE_DIR_VAR] = os.getcwd()
+
+
+def _select_prebuilt_image(env: Dict[str, str]) -> Optional[str]:
+    # Jenkins image
+    candidate_images: List[str] = [env[JENKINS_IMAGE_VAR]]
+
+    # NGC images
+    proc = subprocess.run(
+        r"git tag --sort=creatordate --merged=HEAD | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+' | sed -E 's/^v(.*)$/\1/' | tac",
+        shell=True,
+        capture_output=True,
+        check=True,
+    )
+    for git_tag in proc.stdout.splitlines():
+        git_tag = git_tag.strip()
+        candidate_images.append(f"nvcr.io/nvidia/tensorrt-llm/devel:{git_tag}")
+
+    # Check image availability
+    for candidate_image in candidate_images:
+        LOGGER.info(f"Trying image {candidate_image}")
+
+        try:
+            subprocess.run(
+                f"docker run --rm -it --pull=missing --entrypoint=/bin/true {shlex.quote(candidate_image)}",
+                check=True,
+                shell=True)
+        except subprocess.CalledProcessError:
+            continue
+
+        LOGGER.info(f"Using image {candidate_image}")
+        return candidate_image
+
+    LOGGER.info("No pre-built image found!")
+    return None
+
+
+def _build_local_image() -> str:
+    LOGGER.info("Building container image locally")
+
+    with TemporaryDirectory("trtllm_make_env") as temp_dir:
+        log_path = Path(temp_dir) / "build.log"
+        subprocess.run(
+            f"make -C docker devel_build | tee {shlex.quote(str(log_path))}",
+            check=True,
+            shell=True,
+        )
+        with open(log_path) as f:
+            build_log = f.read()
+
+    # Handle escaped and actual line breaks
+    build_log_lines = re.sub(r"\\\n", " ", build_log).splitlines()
+    for build_log_line in build_log_lines:
+        tokens = shlex.split(build_log_line)
+        if tokens[:3] != ["docker", "buildx", "build"]:
+            continue
+        token = None
+        while tokens and not (token := tokens.pop(0)).startswith("--tag"):
+            pass
+        if token is None:
+            continue
+        if token.startswith("--arg="):
+            token = token.removeprefix("--arg=")
+        else:
+            if not tokens:
+                continue
+            token = tokens.pop(0)
+        return token  # this is the image URI
+    raise RuntimeError(
+        f"Could not parse --tag argument from build log: {build_log}")
+
+
+def _ensure_compose_override():
+    if not COMPOSE_OVERRIDE_PATH.exists():
+        LOGGER.info(
+            f"Creating initial {COMPOSE_OVERRIDE_PATH} from {COMPOSE_OVERRIDE_EXAMPLE_PATH}"
+        )
+        COMPOSE_OVERRIDE_PATH.write_bytes(
+            COMPOSE_OVERRIDE_EXAMPLE_PATH.read_bytes())
+
+
+def _update_dot_env(env: Dict[str, str]):
+    LOGGER.info(f"Updating {DOT_ENV_PATH}")
+
+    output_lines = [
+        "# NOTE: This file is generated by make_env.py, modify devcontainer.env.user instead of this file.\n",
+        "\n",
+    ]
+
+    for env_key, env_value in env.items():
+        if os.environ.get(env_key) == env_value:
+            # Only storing differences w.r.t. base env
+            continue
+        output_lines.append(f"{env_key}=\"{shlex.quote(env_value)}\"\n")
+
+    with open(DOT_ENV_PATH, "w") as f:
+        f.writelines(output_lines)
+
+
+def main():
+    env_files = [
+        JENKINS_PROPS_PATH,
+        DEV_CONTAINER_ENV_PATH,
+    ]
+
+    if DEV_CONTAINER_USER_ENV_PATH.exists():
+        env_files.append(DEV_CONTAINER_USER_ENV_PATH)
+
+    env = _load_env(env_files)
+    _handle_rootless(env_inout=env)
+
+    # Determine container image to use
+    image_uri = env.get(DEV_CONTAINER_IMAGE_VAR)
+    if image_uri:
+        LOGGER.info(f"Using user-provided container image: {image_uri}")
+    else:
+        build_local = bool(int(
+            env[BUILD_LOCAL_VAR].strip())) if BUILD_LOCAL_VAR in env else None
+        image_uri = None
+        if not build_local:
+            image_uri = _select_prebuilt_image(env)
+        if image_uri is None:
+            if build_local is False:
+                raise RuntimeError(
+                    "No suitable container image found and local build disabled."
+                )
+            image_uri = _build_local_image()
+            LOGGER.info(f"Using locally built container image: {image_uri}")
+        env[DEV_CONTAINER_IMAGE_VAR] = image_uri
+
+    _ensure_compose_override()
+
+    _update_dot_env(env)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    try:
+        main()
+    except Exception as e:
+        LOGGER.error(f"{e.__class__.__name__}: {e}")
+        sys.exit(-1)
diff --git a/.gitattributes b/.gitattributes
@@ -1,7 +1,8 @@
 *.a filter=lfs diff=lfs merge=lfs -text
+*.dll filter=lfs diff=lfs merge=lfs -text
 *.lib filter=lfs diff=lfs merge=lfs -text
 *.so filter=lfs diff=lfs merge=lfs -text
-*.dll filter=lfs diff=lfs merge=lfs -text
+*.txz filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 triton_backend/tools/gpt/input_data.json filter=lfs diff=lfs merge=lfs -text
 *cubin.cpp filter=lfs diff=lfs merge=lfs -text

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -14,6 +14,12 @@
 /tensorrt_llm/_torch/auto_deploy @NVIDIA/trt-llm-torch-autodeploy-devs
 /tensorrt_llm/examples/auto_deploy @NVIDIA/trt-llm-torch-autodeploy-devs
 
+## TensorRT-LLM trtllm-bench Reviewers
+/tensorrt_llm/bench @NVIDIA/trtllm-bench-reviewers
+/tensorrt_llm/commands/bench.py @NVIDIA/trtllm-bench-reviewers
+docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
+
+
 # The rule below requires that any PR modifying public APIs must be approved by at least one member
 # of the NVIDIA/trt-llm-committed-api-review-committee or NVIDIA/trt-llm-noncommitted-api-review-committee team.
 # This approval is mandatory regardless of other approvals the PR may have received. Without approval