From 335856bbfa634c17827c825ac418c2894f364a13 Mon Sep 17 00:00:00 2001
From: EmmonsCurse <1577972691@qq.com>
Date: Thu, 14 Aug 2025 20:08:13 +0800
Subject: [PATCH] [CI] fix run_ci error in release/2.0.4

---
 .github/workflows/ci.yml                      | 26 ++++++++++++++++---
 scripts/run_ci.sh                             |  6 ++---
 test/ci_use/EB_Lite/test_EB_Lite_serving.py   |  1 +
 .../EB_VL_Lite/test_EB_VL_Lite_serving.py     | 10 +++----
 4 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 518b15eb99..ae47330500 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -67,9 +67,29 @@ jobs:
             gpu_id=0
             DEVICES="0,1"
           fi
-          FD_API_PORT=$((9180 + gpu_id * 100))
-          FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100))
-          FD_METRICS_PORT=$((9170 + gpu_id * 100))
+          
+          FLASK_PORT=$((41068 + gpu_id * 100))
+          FD_API_PORT=$((41088 + gpu_id * 100))
+          FD_ENGINE_QUEUE_PORT=$((41058 + gpu_id * 100))
+          FD_METRICS_PORT=$((41078 + gpu_id * 100))
+          
+          PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
+          LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
+          echo "==== LOG_FILE is ${LOG_FILE} ===="
+
+          echo "==== PORT CLEAN BEFORE TASK RUN ====" | tee -a $LOG_FILE
+
+          for port in "${PORTS[@]}"; do
+              PIDS=$(lsof -t -i :$port || true)
+              if [ -n "$PIDS" ]; then
+                  echo "Port $port is occupied by PID(s): $PIDS" | tee -a $LOG_FILE
+                  echo "$PIDS" | xargs -r kill -9
+                  echo "Port $port cleared" | tee -a $LOG_FILE
+              else
+                  echo "Port $port is free" | tee -a $LOG_FILE
+              fi
+          done
+          echo "==== PORT CLEAN COMPLETE ====" | tee -a $LOG_FILE
 
           PARENT_DIR=$(dirname "$WORKSPACE")
           echo "PARENT_DIR:$PARENT_DIR"
diff --git a/scripts/run_ci.sh b/scripts/run_ci.sh
index 7d77bccb45..3548083e95 100644
--- a/scripts/run_ci.sh
+++ b/scripts/run_ci.sh
@@ -3,7 +3,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 echo "$DIR"
 
 python -m pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
-python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
+python -m pip install paddlepaddle-gpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
 python -m pip install -r requirements.txt
 python -m pip install jsonschema aistudio_sdk==0.3.5
 bash build.sh || exit 1
@@ -24,7 +24,7 @@ for subdir in "$run_path"*/; do
                 echo "------------------------------------------------------------"
 
                 set +e
-                timeout 360 python -m pytest --disable-warnings -sv "$file"
+                timeout 600 python -m pytest --disable-warnings -sv "$file"
                 exit_code=$?
                 set -e
 
@@ -44,7 +44,7 @@ for subdir in "$run_path"*/; do
                     if [ "$exit_code" -eq 1 ] || [ "$exit_code" -eq 124 ]; then
                         echo "[ERROR] $file 起服务或执行异常，exit_code=$exit_code"
                         if [ "$exit_code" -eq 124 ]; then
-                            echo "[TIMEOUT] $file 脚本执行超过 6 分钟, 任务超时退出！"
+                            echo "[TIMEOUT] $file 脚本执行超过 10 分钟, 任务超时退出！"
                         fi
                     fi
 
diff --git a/test/ci_use/EB_Lite/test_EB_Lite_serving.py b/test/ci_use/EB_Lite/test_EB_Lite_serving.py
index 56f00f6e86..685b9c651e 100644
--- a/test/ci_use/EB_Lite/test_EB_Lite_serving.py
+++ b/test/ci_use/EB_Lite/test_EB_Lite_serving.py
@@ -707,6 +707,7 @@ def test_non_streaming_chat_with_min_tokens(openai_client, capsys):
         model="default",
         messages=[{"role": "user", "content": "Hello, how are you?"}],
         temperature=1,
+        max_tokens=1010,
         extra_body={"min_tokens": min_tokens},
         stream=False,
     )
diff --git a/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py b/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
index f6aa2b424a..1442e7d3e2 100644
--- a/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
+++ b/test/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
@@ -129,14 +129,14 @@ def setup_and_run_server():
             start_new_session=True,  # Enables killing full group via os.killpg
         )
 
-    # Wait up to 300 seconds for API server to be ready
-    for _ in range(300):
+    # Wait up to 600 seconds for API server to be ready
+    for _ in range(600):
         if is_port_open("127.0.0.1", FD_API_PORT):
             print(f"API server is up on port {FD_API_PORT}")
             break
         time.sleep(1)
     else:
-        print("[TIMEOUT] API server failed to start in 5 minutes. Cleaning up...")
+        print("[TIMEOUT] API server failed to start in 10 minutes. Cleaning up...")
         try:
             os.killpg(process.pid, signal.SIGTERM)
         except Exception as e:
@@ -229,9 +229,9 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
     # base result
     base_path = os.getenv("MODEL_PATH")
     if base_path:
-        base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2")
+        base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2-204")
     else:
-        base_file = "ernie-4_5-vl-base-tp2"
+        base_file = "ernie-4_5-vl-base-tp2-204"
     with open(base_file, "r") as f:
         content2 = f.read()