llamastack · ashwinb · Oct 8, 2025 · Oct 7, 2025
diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
@@ -86,7 +86,7 @@ jobs:
 
           # avoid line breaks in the server log, especially because we grep it below.
           export COLUMNS=1984
-          nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
+          nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |

diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml
@@ -59,7 +59,7 @@ jobs:
           # Use the virtual environment created by the build step (name comes from build config)
           source ramalama-stack-test/bin/activate
           uv pip list
-          nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |

diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml
@@ -59,7 +59,7 @@ jobs:
           # Use the virtual environment created by the build step (name comes from build config)
           source ci-test/bin/activate
           uv pip list
-          nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |

diff --git a/docs/docs/advanced_apis/post_training.mdx b/docs/docs/advanced_apis/post_training.mdx
@@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution:
 
 ```bash
 llama stack build --distro starter --image-type venv
-llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml
+llama stack run ~/.llama/distributions/starter/starter-run.yaml
 ```
 
 ### Usage Example

diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx
@@ -322,20 +322,20 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
 llama stack run -h
 usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
                        [--image-type {venv}] [--enable-ui]
-                       [config | template]
+                       [config | distro]
 
 Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
 
 positional arguments:
-  config | template     Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
+  config | distro       Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
 
 options:
   -h, --help            show this help message and exit
   --port PORT           Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
   --image-name IMAGE_NAME
-                        Name of the image to run. Defaults to the current environment (default: None)
+                        [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
   --image-type {venv}
-                        Image Type used during the build. This should be venv. (default: None)
+                        [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
   --enable-ui           Start the UI server (default: False)
 ```
 
@@ -347,9 +347,6 @@ llama stack run tgi
 
 # Start using config file
 llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
-
-# Start using a venv
-llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
 ```
 
 ```

diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
@@ -123,12 +123,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
-        "!uv run --with llama-stack llama stack build --distro together --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro together\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        \"uv run --with llama-stack llama stack run together --image-type venv\",\n",
+        "        \"uv run --with llama-stack llama stack run together\",\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",

diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb
@@ -233,12 +233,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server\n",
-        "!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv\",\n",
+        "        f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu\",\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",

diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb
@@ -223,12 +223,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server\n",
-        "!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro llama_api\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n",
+        "        \"uv run --with llama-stack llama stack run llama_api\",\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",

diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb
@@ -145,12 +145,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
-        "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro starter\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n",
+        "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",

diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md
@@ -88,7 +88,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
    ...
    Build Successful!
    You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
-   You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv
+   You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
    ```
 
 3. **Set the ENV variables by exporting them to the terminal**:
@@ -106,7 +106,6 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
    SAFETY_MODEL=$SAFETY_MODEL \
    OLLAMA_URL=$OLLAMA_URL \
    uv run --with llama-stack llama stack run starter \
-      --image-type venv \
       --port $LLAMA_STACK_PORT
    ```
    Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.

diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
@@ -444,12 +444,24 @@ def _run_stack_build_command_from_build_config(
 
         cprint("Build Successful!", color="green", file=sys.stderr)
         cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
-        cprint(
-            "You can run the new Llama Stack distro via: "
-            + colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
-            color="green",
-            file=sys.stderr,
-        )
+        if build_config.image_type == LlamaStackImageType.VENV:
+            cprint(
+                "You can run the new Llama Stack distro (after activating "
+                + colored(image_name, "cyan")
+                + ") via: "
+                + colored(f"llama stack run {run_config_file}", "blue"),
+                color="green",
+                file=sys.stderr,
+            )
+        elif build_config.image_type == LlamaStackImageType.CONTAINER:
+            cprint(
+                "You can run the container with: "
+                + colored(
+                    f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
+                ),
+                color="green",
+                file=sys.stderr,
+            )
         return distro_path
     else:
         return _generate_run_config(build_config, build_dir, image_name)

diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
@@ -55,12 +55,12 @@ def _add_arguments(self):
             "--image-name",
             type=str,
             default=None,
-            help="Name of the image to run. Defaults to the current environment",
+            help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
         )
         self.parser.add_argument(
             "--image-type",
             type=str,
-            help="Image Type used during the build. This can be only venv.",
+            help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
             choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
         )
         self.parser.add_argument(
@@ -73,11 +73,18 @@ def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
         import yaml
 
         from llama_stack.core.configure import parse_and_maybe_upgrade_config
-        from llama_stack.core.utils.exec import formulate_run_args, run_command
+
+        if args.image_type or args.image_name:
+            self.parser.error(
+                "The --image-type and --image-name flags are no longer supported.\n\n"
+                "Please activate your virtual environment manually before running `llama stack run`.\n\n"
+                "For example:\n"
+                "  source /path/to/venv/bin/activate\n"
+                "  llama stack run <config>\n"
+            )
 
         if args.enable_ui:
             self._start_ui_development_server(args.port)
-        image_type, image_name = args.image_type, args.image_name
 
         if args.config:
             try:
@@ -89,10 +96,6 @@ def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
         else:
             config_file = None
 
-        # Check if config is required based on image type
-        if image_type == ImageType.VENV.value and not config_file:
-            self.parser.error("Config file is required for venv environment")
-
         if config_file:
             logger.info(f"Using run configuration: {config_file}")
 
@@ -107,23 +110,8 @@ def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
                     os.makedirs(str(config.external_providers_dir), exist_ok=True)
             except AttributeError as e:
                 self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
-        else:
-            config = None
-
-        # If neither image type nor image name is provided, assume the server should be run directly
-        # using the current environment packages.
-        if not image_type and not image_name:
-            logger.info("No image type or image name provided. Assuming environment packages.")
-            self._uvicorn_run(config_file, args)
-        else:
-            run_args = formulate_run_args(image_type, image_name)
-
-            run_args.extend([str(args.port)])
-
-            if config_file:
-                run_args.extend(["--config", str(config_file)])
 
-            run_command(run_args)
+        self._uvicorn_run(config_file, args)
 
     def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
         if not config_file:

diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
@@ -186,7 +186,7 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
         echo "Llama Stack Server is already running, skipping start"
     else
         echo "=== Starting Llama Stack Server ==="
-        nohup llama stack run ci-tests --image-type venv > server.log 2>&1 &
+        nohup llama stack run ci-tests > server.log 2>&1 &
 
         echo "Waiting for Llama Stack Server to start..."
         for i in {1..30}; do