diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index d802bf3a51..2e462b0f6e 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -175,25 +175,23 @@ def align_generator(self, gen, **kwargs): # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n' for line in gen: line = line.decode("utf-8") - start = line.find("{") - end = line.rfind("}") + 1 - - json_str = line[start:end] - try: - # sometimes yield empty chunk, do a fallback here - json_data = json.loads(json_str) - if "ops" in json_data and "op" in json_data["ops"][0]: - if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str): - yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n" - else: - pass - elif ( - json_data["choices"][0]["finish_reason"] != "eos_token" - and "content" in json_data["choices"][0]["delta"] - ): - yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" - except Exception as e: - yield f"data: {repr(json_str.encode('utf-8'))}\n\n" + chunks = [chunk.strip() for chunk in line.split("\n\n") if chunk.strip()] + for line in chunks: + start = line.find("{") + end = line.rfind("}") + 1 + json_str = line[start:end] + try: + # sometimes yield empty chunk, do a fallback here + json_data = json.loads(json_str) + if "ops" in json_data and "op" in json_data["ops"][0]: + if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str): + yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n" + else: + pass + elif "content" in json_data["choices"][0]["delta"]: + yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" + except Exception as e: + yield f"data: {repr(json_str.encode('utf-8'))}\n\n" yield "data: [DONE]\n\n" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index f8ac050355..bc9ec9dcb3 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -73,6 +73,17 @@ CPU example with Open Telemetry feature: docker compose -f compose.yaml -f compose.telemetry.yaml up -d ``` +To deploy ChatQnA services with remote endpoints, set the required environment variables mentioned below and run the 'compose_remote.yaml' file. + +**Note**: Set REMOTE_ENDPOINT variable value to "https://api.inference.denvrdata.com" when the remote endpoint to access is "https://api.inference.denvrdata.com/v1/chat/completions" + +```bash +export REMOTE_ENDPOINT= +export LLM_MODEL_ID= +export OPENAI_API_KEY= +docker compose -f compose_remote.yaml up -d +``` + **Note**: developers should build docker image from source when: - Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). @@ -147,6 +158,7 @@ In the context of deploying a ChatQnA pipeline on an Intel® Xeon® platform, we | File | Description | | ------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database | +| [compose_remote.yaml](./compose_remote.yaml) | Default compose file using remote inference endpoints and redis as vector database | | [compose_milvus.yaml](./compose_milvus.yaml) | Uses Milvus as the vector database. All other configurations remain the same as the default | | [compose_pinecone.yaml](./compose_pinecone.yaml) | Uses Pinecone as the vector database. All other configurations remain the same as the default. For more details, refer to [README_pinecone.md](./README_pinecone.md). | | [compose_qdrant.yaml](./compose_qdrant.yaml) | Uses Qdrant as the vector database. All other configurations remain the same as the default. For more details, refer to [README_qdrant.md](./README_qdrant.md). | diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml index a69a420aaa..fbbcb04008 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -102,7 +102,7 @@ services: - RERANK_SERVER_HOST_IP=tei-reranking-service - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80} - LLM_SERVER_HOST_IP=${REMOTE_ENDPOINT} - - OPENAI_API_KEY= ${OPENAI_API_KEY} + - OPENAI_API_KEY=${OPENAI_API_KEY} - LLM_SERVER_PORT=80 - LLM_MODEL=${LLM_MODEL_ID} - LOGFLAG=${LOGFLAG} diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index 6be493b328..af9afdf715 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -181,7 +181,6 @@ async def handle_request(self, request: Request): # Handle the chat messages to generate the prompt prompt = handle_message(chat_request.messages) - # Get the agents flag from the request data, default to False if not provided agents_flag = data.get("agents_flag", False) @@ -200,7 +199,6 @@ async def handle_request(self, request: Request): # Initialize the initial inputs with the generated prompt initial_inputs = {"query": prompt} - # Check if the key index name is provided in the parameters if parameters.index_name: if agents_flag: @@ -268,7 +266,6 @@ async def handle_request(self, request: Request): result_dict, runtime_graph = await megaservice.schedule( initial_inputs=initial_inputs, llm_parameters=parameters ) - for node, response in result_dict.items(): # Check if the last microservice in the megaservice is LLM if ( @@ -277,7 +274,6 @@ async def handle_request(self, request: Request): and megaservice.services[node].service_type == ServiceType.LLM ): return response - # Get the response from the last node in the runtime graph last_node = runtime_graph.all_leaves()[-1] @@ -288,7 +284,6 @@ async def handle_request(self, request: Request): response = result_dict[last_node]["text"] except (KeyError, TypeError): response = "Response Error" - choices = [] usage = UsageInfo() choices.append( diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 0b129c2c8e..fe62853240 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -91,11 +91,27 @@ Different Docker Compose files are available to select the LLM serving backend. - **Description:** Uses Hugging Face Text Generation Inference (TGI) optimized for Intel CPUs as the LLM serving engine. - **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. - **To Run:** + ```bash # Ensure environment variables (HOST_IP, HF_TOKEN) are set docker compose -f compose_tgi.yaml up -d ``` + #### Deployment with remote endpoints (`compose_remote.yaml`) + +- **Compose File:** `compose_remote.yaml` +- **Description:** Uses remote endpoints to access the served LLM's. This is the default configurations except for the LLM serving engine. +- **Services Deployed:** `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. +- **To Run:** + + ```bash + export OPENAI_API_KEY=> + export REMOTE_ENDPOINT= #do not include /v1 + export LLM_MODEL_ID= + + docker compose -f compose_remote.yaml up -d + ``` + ### Configuration Parameters #### Environment Variables diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml index 23b8af1959..fca95ec8ec 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -6,6 +6,9 @@ services: codegen-xeon-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-xeon-backend-server + depends_on: + dataprep-redis-server: + condition: service_healthy ports: - "7778:7778" environment: @@ -14,7 +17,8 @@ services: - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP=${REMOTE_ENDPOINT} - - OPENAI_API_KEY= ${OPENAI_API_KEY} + - LLM_MODEL_ID=${LLM_MODEL_ID} + - OPENAI_API_KEY=${OPENAI_API_KEY} - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} @@ -61,6 +65,11 @@ services: INDEX_NAME: ${INDEX_NAME} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} LOGFLAG: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 10 restart: unless-stopped tei-embedding-serving: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md index 23af6bf96b..8c2c7728aa 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/README.md +++ b/DocSum/docker_compose/intel/cpu/xeon/README.md @@ -52,6 +52,18 @@ cd intel/cpu/xeon/ docker compose up -d ``` +To deploy DocSum services with remote endpoints, set the required environment variables mentioned below and run the 'compose_remote.yaml' file. + +**Note**: Set LLM_ENDPOINT variable value to "https://api.inference.denvrdata.com" when the remote endpoint to access is "https://api.inference.denvrdata.com/v1/chat/completions" + +```bash +export LLM_ENDPOINT= +export LLM_MODEL_ID= +export OPENAI_API_KEY= + +docker compose -f compose_remote.yaml up -d +``` + **Note**: developers should build docker image from source when: - Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). @@ -113,10 +125,11 @@ All the DocSum containers will be stopped and then removed on completion of the In the context of deploying a DocSum pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. -| File | Description | -| -------------------------------------- | ----------------------------------------------------------------------------------------- | -| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | -| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default | +| File | Description | +| -------------------------------------------- | -------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as default | +| [compose_remote.yaml](./compose_remote.yaml) | Uses remote inference endpoints for LLMs. All other configurations are same as default | ## DocSum Detailed Usage diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose_remote.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose_remote.yaml new file mode 100644 index 0000000000..4282878ba9 --- /dev/null +++ b/DocSum/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -0,0 +1,73 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + llm-docsum-vllm: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-xeon-llm-server + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + OPENAI_API_KEY: ${OPENAI_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + whisper: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: docsum-xeon-whisper-server + ports: + - "7066:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + + docsum-xeon-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-xeon-backend-server + depends_on: + - llm-docsum-vllm + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP} + ipc: host + restart: always + + docsum-gradio-ui: + image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest} + container_name: docsum-xeon-ui-server + depends_on: + - docsum-xeon-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose_remote.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose_remote.yaml new file mode 100644 index 0000000000..1ee0a41672 --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -0,0 +1,283 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.3" + +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + DATAPREP_TYPE: ${DATAPREP_TYPE} + LOGFLAG: ${LOGFLAG} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 50 + restart: unless-stopped + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "${MODEL_CACHE:-./data_embedding}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 60 + retriever: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7001:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${RERANK_MODEL_ID} --auto-truncate" + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "${MODEL_CACHE:-./data_tei}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 60 + + chatqna-xeon-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-xeon-backend-server + depends_on: + redis-vector-db: + condition: service_started + tei-embedding-service: + condition: service_healthy + dataprep-redis-service: + condition: service_healthy + retriever: + condition: service_started + tei-reranking-service: + condition: service_healthy + ports: + - "8888:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: chatqna-xeon-backend-server + EMBEDDING_SERVER_HOST_IP: tei-embedding-service + EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80} + RETRIEVER_SERVICE_HOST_IP: retriever + RERANK_SERVER_HOST_IP: tei-reranking-service + RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80} + LLM_SERVER_HOST_IP: ${REMOTE_ENDPOINT} + OPENAI_API_KEY: ${OPENAI_API_KEY} + LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80} + LLM_MODEL: ${LLM_MODEL_ID} + LOGFLAG: ${LOGFLAG} + ipc: host + restart: always + + codegen-xeon-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest} + container_name: codegen-xeon-backend-server + ports: + - "7778:7778" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${host_ip} + LLM_MODEL_ID: ${LLM_MODEL_ID} + OPENAI_API_KEY: ${OPENAI_API_KEY} + LLM_SERVICE_HOST_IP: ${REMOTE_ENDPOINT} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN:-9000} + LOGFLAG: ${LOGFLAG} + ipc: host + restart: always + mongo: + image: mongo:7.0.11 + container_name: mongodb + ports: + - 27017:27017 + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + command: mongod --quiet --logpath /dev/null + + chathistory-mongo: + image: ${REGISTRY:-opea}/chathistory-mongo:${TAG:-latest} + container_name: chathistory-mongo-server + ports: + - "6012:6012" + ipc: host + environment: + http_proxy: ${http_proxy} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + MONGO_HOST: ${MONGO_HOST:-mongo} + MONGO_PORT: ${MONGO_PORT:-27017} + COLLECTION_NAME: ${COLLECTION_NAME:-Conversations} + LOGFLAG: ${LOGFLAG} + restart: unless-stopped + + promptregistry-mongo: + image: ${REGISTRY:-opea}/promptregistry-mongo:${TAG:-latest} + container_name: promptregistry-mongo-server + ports: + - "6018:6018" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + MONGO_HOST: ${MONGO_HOST:-mongo} + MONGO_PORT: ${MONGO_PORT:-27017} + COLLECTION_NAME: ${PROMPT_COLLECTION_NAME:-prompt} + LOGFLAG: ${LOGFLAG} + restart: unless-stopped + keycloak: + image: quay.io/keycloak/keycloak:25.0.2 + container_name: keycloak-server + ports: + - 8080:8080 + environment: + - KEYCLOAK_ADMIN=admin + - KEYCLOAK_ADMIN_PASSWORD=admin + - KC_PROXY=edge + ipc: host + command: start-dev + restart: always + + productivity-suite-xeon-react-ui-server: + image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest} + container_name: productivity-suite-xeon-react-ui-server + ports: + - "5174:80" + environment: + - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA} + - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN} + - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM} + - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT} + - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT} + - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT} + - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT} + - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT} + - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT} + - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT} + - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT} + - APP_PROMPT_SERVICE_DELETE_ENDPOINT=${PROMPT_SERVICE_DELETE_ENDPOINT} + - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT} + - LOGFLAG=${LOGFLAG} + ipc: host + restart: always + + llm-docsum-tgi: + image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest} + container_name: docsum-xeon-llm-server + ports: + - ${LLM_PORT_DOCSUM:-9003}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${REMOTE_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + OPENAI_API_KEY: ${OPENAI_API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-1024} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-2048} + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + docsum-xeon-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-xeon-backend-server + depends_on: + - llm-docsum-tgi + ports: + - "${BACKEND_SERVICE_PORT_DOCSUM:-8890}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${host_ip} + - LLM_SERVICE_HOST_IP=llm-docsum-tgi + - LLM_SERVICE_PORT=9000 + - ASR_SERVICE_HOST_IP=whisper + - OTEL_SDK_DISABLED=true + - OTEL_TRACES_EXPORTER=none + ipc: host + restart: always + whisper: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-server + ports: + - "7066:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped +networks: + default: + driver: bridge diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh index f88f72e444..a636cd46a1 100755 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh @@ -25,7 +25,7 @@ export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get" export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create" export PROMPT_SERVICE_DELETE_ENDPOINT="http://${host_ip}:6018/v1/prompt/delete" export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080" -export DocSum_COMPONENT_NAME="OpeaDocSumTgi" +export DocSum_COMPONENT_NAME="OpeaDocSumvLLM" #Set no proxy export no_proxy="$no_proxy,tgi_service_codegen,llm_codegen,tei-embedding-service,tei-reranking-service,chatqna-xeon-backend-server,retriever,tgi-service,redis-vector-db,whisper,llm-docsum-tgi,docsum-xeon-backend-server,mongo,codegen"