From eb2eea4e096cc16c0833b7a1237c51272dbab51e Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Wed, 20 Aug 2025 20:43:09 +0000 Subject: [PATCH 1/2] widep --- components/backends/sglang/docs/multinode-examples.md | 2 +- components/backends/sglang/slurm_jobs/scripts/h100.sh | 2 +- container/Dockerfile.sglang-wideep | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/backends/sglang/docs/multinode-examples.md b/components/backends/sglang/docs/multinode-examples.md index dade00cb11..7030f9f1e3 100644 --- a/components/backends/sglang/docs/multinode-examples.md +++ b/components/backends/sglang/docs/multinode-examples.md @@ -78,7 +78,7 @@ python3 -m dynamo.sglang \ Node 4: Run the remaining 8 shards of the decode worker ```bash -python3 -m dynamo.sglang.decode_worker \ +python3 -m dynamo.sglang \ --model-path /model/ \ --served-model-name deepseek-ai/DeepSeek-R1 \ --tp 16 \ diff --git a/components/backends/sglang/slurm_jobs/scripts/h100.sh b/components/backends/sglang/slurm_jobs/scripts/h100.sh index 5ba4fd95c6..f2fba771ed 100755 --- a/components/backends/sglang/slurm_jobs/scripts/h100.sh +++ b/components/backends/sglang/slurm_jobs/scripts/h100.sh @@ -156,7 +156,7 @@ elif [ "$mode" = "decode" ]; then --deepep-mode low_latency \ --mem-fraction-static 0.835 \ --ep-num-redundant-experts 32 \ - --cuda-graph-bs 256 + --cuda-graph-bs 128 elif [ "$cmd" = "sglang" ]; then # H100 sglang decode command python3 -m sglang.launch_server \ diff --git a/container/Dockerfile.sglang-wideep b/container/Dockerfile.sglang-wideep index bcd89348f7..ba23e5a876 100644 --- a/container/Dockerfile.sglang-wideep +++ b/container/Dockerfile.sglang-wideep @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG SGLANG_IMAGE_TAG="v0.5.0rc0-cu126" +ARG SGLANG_IMAGE_TAG="v0.5.0rc2-cu126" FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG} @@ -97,7 +97,7 @@ RUN cd dynamo/lib/bindings/python && \ cd /sgl-workspace/dynamo && \ pip install --break-system-packages . -RUN pip install --break-system-packages sglang-router==0.1.5 +RUN pip install --break-system-packages sglang-router==0.1.9 RUN wget --tries=3 --waitretry=5 \ https://github.com/nats-io/nats-server/releases/download/v2.10.28/\ From e49b7478f690954c991f9052a3986c8e4b118331 Mon Sep 17 00:00:00 2001 From: ishandhanani Date: Wed, 20 Aug 2025 21:25:34 +0000 Subject: [PATCH 2/2] bump --- components/backends/sglang/slurm_jobs/scripts/worker_setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/backends/sglang/slurm_jobs/scripts/worker_setup.py b/components/backends/sglang/slurm_jobs/scripts/worker_setup.py index cfe2aaa634..c6a0a5529c 100644 --- a/components/backends/sglang/slurm_jobs/scripts/worker_setup.py +++ b/components/backends/sglang/slurm_jobs/scripts/worker_setup.py @@ -261,7 +261,7 @@ def setup_head_prefill_node(prefill_host_ip: str) -> None: logging.info(f"Starting ingress server on node {prefill_host_ip}") ingress_process = run_command( - "dynamo run in=http out=dyn --http-port=8000", background=True + "python3 -m dynamo.frontend --http-port=8000", background=True ) if not ingress_process: raise RuntimeError("Failed to start ingress")