opea-project · WenjiaoYue · May 8, 2025 · May 16, 2025 · May 20, 2025 · May 20, 2025
@@ -2,4 +2,4 @@
 # SPDX-License-Identifier: Apache-2.0
 
 export VLLM_VER=v0.8.3
-export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+export VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
@@ -131,8 +131,8 @@ services:
       PT_HPU_ENABLE_LAZY_COLLECTIVES: true
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:8086/health || exit 1"]
-      interval: 10s
-      timeout: 10s
+      interval: 100s
+      timeout: 100s
       retries: 100
     runtime: habana
     cap_add:

@@ -41,7 +41,7 @@ function build_agent_docker_image_gaudi_vllm() {
     get_genai_comps
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build agent image with --no-cache..."

@@ -55,8 +55,8 @@ services:
       LLM_SERVER_PORT: ${LLM_SERVER_PORT}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"]
-      interval: 10s
-      timeout: 10s
+      interval: 100s
+      timeout: 100s
       retries: 100
     runtime: habana
     cap_add:

@@ -27,7 +27,7 @@ function build_docker_images() {
 
     git clone https://github.com/HabanaAI/vllm-fork.git
     cd vllm-fork/
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     echo "Check out vLLM tag ${VLLM_FORK_VER}"
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 

@@ -109,8 +109,8 @@ services:
       VLLM_TORCH_PROFILER_DIR: "/mnt"
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
-      interval: 10s
-      timeout: 10s
+      interval: 100s
+      timeout: 100s
       retries: 100
     runtime: habana
     cap_add:

@@ -55,8 +55,8 @@ services:
     command: --model ${GURADRAILS_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:8088/health || exit 1"]
-      interval: 10s
-      timeout: 10s
+      interval: 100s
+      timeout: 100s
       retries: 150
   guardrails:
     image: ${REGISTRY:-opea}/guardrails:${TAG:-latest}

@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."

@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."

@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."

@@ -24,7 +24,7 @@ function build_docker_images() {
     docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
     popd && sleep 1s
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."

@@ -54,8 +54,8 @@ services:
       VLLM_TORCH_PROFILER_DIR: "/mnt"
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
-      interval: 10s
-      timeout: 10s
+      interval: 100s
+      timeout: 100s
       retries: 100
     runtime: habana
     cap_add:

@@ -27,7 +27,7 @@ function build_docker_images() {
 
     # Download Gaudi vllm of latest tag
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     echo "Check out vLLM tag ${VLLM_FORK_VER}"
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 

@@ -21,8 +21,8 @@ services:
       VLLM_TORCH_PROFILER_DIR: "/mnt"
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
-      interval: 10s
-      timeout: 10s
+      interval: 100s
+      timeout: 100s
       retries: 100
     runtime: habana
     cap_add:

@@ -26,7 +26,7 @@ function build_docker_images() {
     popd && sleep 1s
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."

@@ -22,8 +22,8 @@ services:
       VLLM_TORCH_PROFILER_DIR: "/mnt"
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
-      interval: 10s
-      timeout: 10s
+      interval: 100s
+      timeout: 100s
       retries: 100
     runtime: habana
     cap_add:

@@ -50,7 +50,7 @@ function build_docker_images() {
     popd && sleep 1s
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."

@@ -96,7 +96,7 @@ function build_vllm_docker_image() {
     fi
     cd ./vllm-fork
 
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null
     docker build --no-cache -f Dockerfile.hpu -t $VLLM_IMAGE --shm-size=128g . --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY
     if [ $? -ne 0 ]; then

@@ -27,7 +27,7 @@ function build_docker_images() {
     popd && sleep 1s
 
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
-    VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0
+    VLLM_FORK_VER=v0.7.2+Gaudi-1.21.0
     git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../
 
     service_list="visualqna visualqna-ui lvm nginx vllm-gaudi"