diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py
index d802bf3a51..2e462b0f6e 100644
--- a/ChatQnA/chatqna.py
+++ b/ChatQnA/chatqna.py
@@ -175,25 +175,23 @@ def align_generator(self, gen, **kwargs):
     # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
     for line in gen:
         line = line.decode("utf-8")
-        start = line.find("{")
-        end = line.rfind("}") + 1
-
-        json_str = line[start:end]
-        try:
-            # sometimes yield empty chunk, do a fallback here
-            json_data = json.loads(json_str)
-            if "ops" in json_data and "op" in json_data["ops"][0]:
-                if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
-                    yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
-                else:
-                    pass
-            elif (
-                json_data["choices"][0]["finish_reason"] != "eos_token"
-                and "content" in json_data["choices"][0]["delta"]
-            ):
-                yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
-        except Exception as e:
-            yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
+        chunks = [chunk.strip() for chunk in line.split("\n\n") if chunk.strip()]
+        for line in chunks:
+            start = line.find("{")
+            end = line.rfind("}") + 1
+            json_str = line[start:end]
+            try:
+                # sometimes yield empty chunk, do a fallback here
+                json_data = json.loads(json_str)
+                if "ops" in json_data and "op" in json_data["ops"][0]:
+                    if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
+                        yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
+                    else:
+                        pass
+                elif "content" in json_data["choices"][0]["delta"]:
+                    yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
+            except Exception as e:
+                yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
     yield "data: [DONE]\n\n"
 
 
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
index f8ac050355..bc9ec9dcb3 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -73,6 +73,17 @@ CPU example with Open Telemetry feature:
 docker compose -f compose.yaml -f compose.telemetry.yaml up -d
 ```
 
+To deploy ChatQnA services with remote endpoints, set the required environment variables mentioned below and run the 'compose_remote.yaml' file.
+
+**Note**: Set REMOTE_ENDPOINT variable value to "https://api.inference.denvrdata.com" when the remote endpoint to access is "https://api.inference.denvrdata.com/v1/chat/completions"
+
+```bash
+export REMOTE_ENDPOINT=<endpoint-url>
+export LLM_MODEL_ID=<model-id>
+export OPENAI_API_KEY=<API-KEY>
+docker compose -f compose_remote.yaml up -d
+```
+
 **Note**: developers should build docker image from source when:
 
 - Developing off the git main branch (as the container's ports in the repo may be different from the published docker image).
@@ -147,6 +158,7 @@ In the context of deploying a ChatQnA pipeline on an Intel® Xeon® platform, we
 | File                                                         | Description                                                                                                                                                           |
 | ------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | [compose.yaml](./compose.yaml)                               | Default compose file using vllm as serving framework and redis as vector database                                                                                     |
+| [compose_remote.yaml](./compose_remote.yaml)                 | Default compose file using remote inference endpoints and redis as vector database                                                                                    |
 | [compose_milvus.yaml](./compose_milvus.yaml)                 | Uses Milvus as the vector database. All other configurations remain the same as the default                                                                           |
 | [compose_pinecone.yaml](./compose_pinecone.yaml)             | Uses Pinecone as the vector database. All other configurations remain the same as the default. For more details, refer to [README_pinecone.md](./README_pinecone.md). |
 | [compose_qdrant.yaml](./compose_qdrant.yaml)                 | Uses Qdrant as the vector database. All other configurations remain the same as the default. For more details, refer to [README_qdrant.md](./README_qdrant.md).       |
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
index a69a420aaa..fbbcb04008 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -102,7 +102,7 @@ services:
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
       - LLM_SERVER_HOST_IP=${REMOTE_ENDPOINT}
-      - OPENAI_API_KEY= ${OPENAI_API_KEY}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
       - LLM_SERVER_PORT=80
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
index 6be493b328..af9afdf715 100644
--- a/CodeGen/codegen.py
+++ b/CodeGen/codegen.py
@@ -181,7 +181,6 @@ async def handle_request(self, request: Request):
 
         # Handle the chat messages to generate the prompt
         prompt = handle_message(chat_request.messages)
-
         # Get the agents flag from the request data, default to False if not provided
         agents_flag = data.get("agents_flag", False)
 
@@ -200,7 +199,6 @@ async def handle_request(self, request: Request):
 
         # Initialize the initial inputs with the generated prompt
         initial_inputs = {"query": prompt}
-
         # Check if the key index name is provided in the parameters
         if parameters.index_name:
             if agents_flag:
@@ -268,7 +266,6 @@ async def handle_request(self, request: Request):
         result_dict, runtime_graph = await megaservice.schedule(
             initial_inputs=initial_inputs, llm_parameters=parameters
         )
-
         for node, response in result_dict.items():
             # Check if the last microservice in the megaservice is LLM
             if (
@@ -277,7 +274,6 @@ async def handle_request(self, request: Request):
                 and megaservice.services[node].service_type == ServiceType.LLM
             ):
                 return response
-
         # Get the response from the last node in the runtime graph
         last_node = runtime_graph.all_leaves()[-1]
 
@@ -288,7 +284,6 @@ async def handle_request(self, request: Request):
                 response = result_dict[last_node]["text"]
             except (KeyError, TypeError):
                 response = "Response Error"
-
         choices = []
         usage = UsageInfo()
         choices.append(
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 0b129c2c8e..fe62853240 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -91,11 +91,27 @@ Different Docker Compose files are available to select the LLM serving backend.
 - **Description:** Uses Hugging Face Text Generation Inference (TGI) optimized for Intel CPUs as the LLM serving engine.
 - **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
 - **To Run:**
+
   ```bash
   # Ensure environment variables (HOST_IP, HF_TOKEN) are set
   docker compose -f compose_tgi.yaml up -d
   ```
 
+  #### Deployment with remote endpoints (`compose_remote.yaml`)
+
+- **Compose File:** `compose_remote.yaml`
+- **Description:** Uses remote endpoints to access the served LLM's. This is the default configurations except for the LLM serving engine.
+- **Services Deployed:** `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`.
+- **To Run:**
+
+  ```bash
+  export OPENAI_API_KEY=<api-key>>
+  export REMOTE_ENDPOINT=<remote-endpoint> #do not include /v1
+  export LLM_MODEL_ID=<model-id>
+
+  docker compose -f compose_remote.yaml up -d
+  ```
+
 ### Configuration Parameters
 
 #### Environment Variables
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml
index 23b8af1959..fca95ec8ec 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -6,6 +6,9 @@ services:
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
+    depends_on:
+      dataprep-redis-server:
+        condition: service_healthy
     ports:
       - "7778:7778"
     environment:
@@ -14,7 +17,8 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${REMOTE_ENDPOINT}
-      - OPENAI_API_KEY= ${OPENAI_API_KEY}
+      - LLM_MODEL_ID=${LLM_MODEL_ID}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
       - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
       - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
       - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
@@ -61,6 +65,11 @@ services:
       INDEX_NAME: ${INDEX_NAME}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
       LOGFLAG: true
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
     restart: unless-stopped
   tei-embedding-serving:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md
index 23af6bf96b..8c2c7728aa 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/README.md
+++ b/DocSum/docker_compose/intel/cpu/xeon/README.md
@@ -52,6 +52,18 @@ cd intel/cpu/xeon/
 docker compose up -d
 ```
 
+To deploy DocSum services with remote endpoints, set the required environment variables mentioned below and run the 'compose_remote.yaml' file.
+
+**Note**: Set LLM_ENDPOINT variable value to "https://api.inference.denvrdata.com" when the remote endpoint to access is "https://api.inference.denvrdata.com/v1/chat/completions"
+
+```bash
+export LLM_ENDPOINT=<endpoint-url>
+export LLM_MODEL_ID=<model-id>
+export OPENAI_API_KEY=<API-KEY>
+
+docker compose -f compose_remote.yaml up -d
+```
+
 **Note**: developers should build docker image from source when:
 
 - Developing off the git main branch (as the container's ports in the repo may be different from the published docker image).
@@ -113,10 +125,11 @@ All the DocSum containers will be stopped and then removed on completion of the
 
 In the context of deploying a DocSum pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application.
 
-| File                                   | Description                                                                               |
-| -------------------------------------- | ----------------------------------------------------------------------------------------- |
-| [compose.yaml](./compose.yaml)         | Default compose file using vllm as serving framework                                      |
-| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default |
+| File                                         | Description                                                                            |
+| -------------------------------------------- | -------------------------------------------------------------------------------------- |
+| [compose.yaml](./compose.yaml)               | Default compose file using vllm as serving framework                                   |
+| [compose_tgi.yaml](./compose_tgi.yaml)       | The LLM serving framework is TGI. All other configurations remain the same as default  |
+| [compose_remote.yaml](./compose_remote.yaml) | Uses remote inference endpoints for LLMs. All other configurations are same as default |
 
 ## DocSum Detailed Usage
 
diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose_remote.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose_remote.yaml
new file mode 100644
index 0000000000..4282878ba9
--- /dev/null
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -0,0 +1,73 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  llm-docsum-vllm:
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
+    container_name: docsum-xeon-llm-server
+    ports:
+      - ${LLM_PORT:-9000}:9000
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
+      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
+      DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
+    restart: unless-stopped
+
+  whisper:
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+    container_name: docsum-xeon-whisper-server
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    restart: unless-stopped
+
+  docsum-xeon-backend-server:
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+    container_name: docsum-xeon-backend-server
+    depends_on:
+      - llm-docsum-vllm
+    ports:
+      - "${BACKEND_SERVICE_PORT:-8888}:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
+    ipc: host
+    restart: always
+
+  docsum-gradio-ui:
+    image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest}
+    container_name: docsum-xeon-ui-server
+    depends_on:
+      - docsum-xeon-backend-server
+    ports:
+      - "${FRONTEND_SERVICE_PORT:-5173}:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
+      - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+
+networks:
+  default:
+    driver: bridge
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose_remote.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose_remote.yaml
new file mode 100644
index 0000000000..1ee0a41672
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -0,0 +1,283 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.3"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:5000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: redis://redis-vector-db:6379
+      REDIS_HOST: redis-vector-db
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      DATAPREP_TYPE: ${DATAPREP_TYPE}
+      LOGFLAG: ${LOGFLAG}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 50
+    restart: unless-stopped
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
+    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "${MODEL_CACHE:-./data_embedding}:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 60
+  retriever:
+    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7001:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: redis://redis-vector-db:6379
+      REDIS_HOST: redis-vector-db
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      LOGFLAG: ${LOGFLAG}
+      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
+    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${RERANK_MODEL_ID} --auto-truncate"
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "${MODEL_CACHE:-./data_tei}:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      host_ip: ${host_ip}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 60
+
+  chatqna-xeon-backend-server:
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
+    container_name: chatqna-xeon-backend-server
+    depends_on:
+      redis-vector-db:
+        condition: service_started
+      tei-embedding-service:
+        condition: service_healthy
+      dataprep-redis-service:
+        condition: service_healthy
+      retriever:
+        condition: service_started
+      tei-reranking-service:
+        condition: service_healthy
+    ports:
+      - "8888:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: chatqna-xeon-backend-server
+      EMBEDDING_SERVER_HOST_IP: tei-embedding-service
+      EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
+      RETRIEVER_SERVICE_HOST_IP: retriever
+      RERANK_SERVER_HOST_IP: tei-reranking-service
+      RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
+      LLM_SERVER_HOST_IP: ${REMOTE_ENDPOINT}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
+      LLM_MODEL: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
+    ipc: host
+    restart: always
+
+  codegen-xeon-backend-server:
+    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
+    container_name: codegen-xeon-backend-server
+    ports:
+      - "7778:7778"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${host_ip}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      LLM_SERVICE_HOST_IP: ${REMOTE_ENDPOINT}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN:-9000}
+      LOGFLAG: ${LOGFLAG}
+    ipc: host
+    restart: always
+  mongo:
+    image: mongo:7.0.11
+    container_name: mongodb
+    ports:
+      - 27017:27017
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+    command: mongod --quiet --logpath /dev/null
+
+  chathistory-mongo:
+    image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest}
+    container_name: chathistory-mongo-server
+    ports:
+      - "6012:6012"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      MONGO_HOST: ${MONGO_HOST:-mongo}
+      MONGO_PORT: ${MONGO_PORT:-27017}
+      COLLECTION_NAME: ${COLLECTION_NAME:-Conversations}
+      LOGFLAG: ${LOGFLAG}
+    restart: unless-stopped
+
+  promptregistry-mongo:
+    image: ${REGISTRY:-opea}/promptregistry-mongo:${TAG:-latest}
+    container_name: promptregistry-mongo-server
+    ports:
+      - "6018:6018"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      MONGO_HOST: ${MONGO_HOST:-mongo}
+      MONGO_PORT: ${MONGO_PORT:-27017}
+      COLLECTION_NAME: ${PROMPT_COLLECTION_NAME:-prompt}
+      LOGFLAG: ${LOGFLAG}
+    restart: unless-stopped
+  keycloak:
+    image: quay.io/keycloak/keycloak:25.0.2
+    container_name: keycloak-server
+    ports:
+      - 8080:8080
+    environment:
+      - KEYCLOAK_ADMIN=admin
+      - KEYCLOAK_ADMIN_PASSWORD=admin
+      - KC_PROXY=edge
+    ipc: host
+    command: start-dev
+    restart: always
+
+  productivity-suite-xeon-react-ui-server:
+    image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}
+    container_name: productivity-suite-xeon-react-ui-server
+    ports:
+      - "5174:80"
+    environment:
+      - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA}
+      - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN}
+      - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM}
+      - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT}
+      - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT}
+      - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT}
+      - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT}
+      - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT}
+      - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT}
+      - APP_PROMPT_SERVICE_DELETE_ENDPOINT=${PROMPT_SERVICE_DELETE_ENDPOINT}
+      - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT}
+      - LOGFLAG=${LOGFLAG}
+    ipc: host
+    restart: always
+
+  llm-docsum-tgi:
+    image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
+    container_name: docsum-xeon-llm-server
+    ports:
+      - ${LLM_PORT_DOCSUM:-9003}:9000
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_ENDPOINT: ${REMOTE_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-1024}
+      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-2048}
+      DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
+      LOGFLAG: ${LOGFLAG:-False}
+    restart: unless-stopped
+  docsum-xeon-backend-server:
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+    container_name: docsum-xeon-backend-server
+    depends_on:
+      - llm-docsum-tgi
+    ports:
+      - "${BACKEND_SERVICE_PORT_DOCSUM:-8890}:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=${host_ip}
+      - LLM_SERVICE_HOST_IP=llm-docsum-tgi
+      - LLM_SERVICE_PORT=9000
+      - ASR_SERVICE_HOST_IP=whisper
+      - OTEL_SDK_DISABLED=true
+      - OTEL_TRACES_EXPORTER=none
+    ipc: host
+    restart: always
+  whisper:
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+    container_name: whisper-server
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    restart: unless-stopped
+networks:
+  default:
+    driver: bridge
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
index f88f72e444..a636cd46a1 100755
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
@@ -25,7 +25,7 @@ export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get"
 export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create"
 export PROMPT_SERVICE_DELETE_ENDPOINT="http://${host_ip}:6018/v1/prompt/delete"
 export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080"
-export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
+export DocSum_COMPONENT_NAME="OpeaDocSumvLLM"
 
 #Set no proxy
 export no_proxy="$no_proxy,tgi_service_codegen,llm_codegen,tei-embedding-service,tei-reranking-service,chatqna-xeon-backend-server,retriever,tgi-service,redis-vector-db,whisper,llm-docsum-tgi,docsum-xeon-backend-server,mongo,codegen"