From 9328077628cba2a5d3a7e1fb7e5d824b351bd4ad Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Tue, 1 Apr 2025 12:51:58 -0400 Subject: [PATCH 1/8] add docker build doc for intel cpu/xpu/hpu Signed-off-by: Liu, Kaixuan --- docs/source/en/custom_container.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/source/en/custom_container.md b/docs/source/en/custom_container.md index ab2913c4..448b6714 100644 --- a/docs/source/en/custom_container.md +++ b/docs/source/en/custom_container.md @@ -41,3 +41,27 @@ runtime_compute_cap=80 docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap ``` + +To build a CPU container with Intel® Extension for PyTorch acceleration, you can build the container following: + +```shell +platform="cpu" + +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_cpu_ipex +``` + +To build an XPU container with Intel® Extension for PyTorch acceleration, you can build the container following: + +```shell +platform="xpu" + +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_xpu_ipex +``` + +To build an HPU(Gaudi) container, you can build the container following: + +```shell +platform="hpu" + +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_hpu +``` \ No newline at end of file From bc7ab25ace00713e949140c9c434c7d0911786e6 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Tue, 1 Apr 2025 12:54:27 -0400 Subject: [PATCH 2/8] add `einops` pip install to enable model `nomic-ai/nomic-embed-text-v1.5` Signed-off-by: Liu, Kaixuan --- backends/python/server/requirements-intel.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/python/server/requirements-intel.txt b/backends/python/server/requirements-intel.txt index 36b330db..5accea69 100644 --- a/backends/python/server/requirements-intel.txt +++ b/backends/python/server/requirements-intel.txt @@ -42,3 +42,4 @@ win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and wrapt==1.15.0 ; python_version >= "3.9" and python_version < "3.13" transformers==4.40.0 ; python_version >= "3.9" and python_version < "3.13" pyrsistent==0.20.0 ; python_version >= "3.9" and python_version < "3.13" +einops==0.8.0 ; python_version >= "3.9" and python_version < "3.13" \ No newline at end of file From afbecdc404efbd2ff4a526e0e9b98b23cb4ea4ab Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Tue, 1 Apr 2025 13:23:14 -0400 Subject: [PATCH 3/8] add doc in `quick tour` Signed-off-by: Liu, Kaixuan --- docs/source/en/quick_tour.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/source/en/quick_tour.md b/docs/source/en/quick_tour.md index 6fd94f0a..3f69c672 100644 --- a/docs/source/en/quick_tour.md +++ b/docs/source/en/quick_tour.md @@ -36,6 +36,30 @@ volume=$PWD/data docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:1.6 --model-id $model ``` +And if you want to deploy your model on intel CPU, you can following: + +```shell +model='BAAI/bge-large-en-v1.5' +volume=$PWD/data + +docker run -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-ipex-latest --model-id $model +``` + +If you want to deploy your model on intel XPU, you can following: +```shell +model='BAAI/bge-large-en-v1.5' +volume=$PWD/data + +docker run -p 8080:80 -v $volume:/data --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --pull always ghcr.io/huggingface/text-embeddings-inference:xpu-ipex-latest --model-id $model --dtype float16 +``` + +If you want to deploy your model on HPU(Gaudi), you can following: +```shell +model='BAAI/bge-large-en-v1.5' +volume=$PWD/data + +docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e MAX_WARMUP_SEQUENCE_LENGTH=512 ghcr.io/huggingface/text-embeddings-inference:hpu-latest --model-id BAAI/bge-large-en-v1.5 --dtype bfloat16 +``` We also recommend sharing a volume with the Docker container (`volume=$PWD/data`) to avoid downloading weights every run. From 8c5caa5ce1174730817a01a2cb18e0b100046d35 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Wed, 2 Apr 2025 20:01:12 -0400 Subject: [PATCH 4/8] move related doc to a separate doc file Signed-off-by: Liu, Kaixuan --- docs/source/en/custom_container.md | 24 ------------------------ docs/source/en/local_intel.md | 0 docs/source/en/quick_tour.md | 24 ------------------------ 3 files changed, 48 deletions(-) create mode 100644 docs/source/en/local_intel.md diff --git a/docs/source/en/custom_container.md b/docs/source/en/custom_container.md index 448b6714..ab2913c4 100644 --- a/docs/source/en/custom_container.md +++ b/docs/source/en/custom_container.md @@ -41,27 +41,3 @@ runtime_compute_cap=80 docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap ``` - -To build a CPU container with Intel® Extension for PyTorch acceleration, you can build the container following: - -```shell -platform="cpu" - -docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_cpu_ipex -``` - -To build an XPU container with Intel® Extension for PyTorch acceleration, you can build the container following: - -```shell -platform="xpu" - -docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_xpu_ipex -``` - -To build an HPU(Gaudi) container, you can build the container following: - -```shell -platform="hpu" - -docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_hpu -``` \ No newline at end of file diff --git a/docs/source/en/local_intel.md b/docs/source/en/local_intel.md new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/en/quick_tour.md b/docs/source/en/quick_tour.md index 3f69c672..6fd94f0a 100644 --- a/docs/source/en/quick_tour.md +++ b/docs/source/en/quick_tour.md @@ -36,30 +36,6 @@ volume=$PWD/data docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:1.6 --model-id $model ``` -And if you want to deploy your model on intel CPU, you can following: - -```shell -model='BAAI/bge-large-en-v1.5' -volume=$PWD/data - -docker run -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-ipex-latest --model-id $model -``` - -If you want to deploy your model on intel XPU, you can following: -```shell -model='BAAI/bge-large-en-v1.5' -volume=$PWD/data - -docker run -p 8080:80 -v $volume:/data --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --pull always ghcr.io/huggingface/text-embeddings-inference:xpu-ipex-latest --model-id $model --dtype float16 -``` - -If you want to deploy your model on HPU(Gaudi), you can following: -```shell -model='BAAI/bge-large-en-v1.5' -volume=$PWD/data - -docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e MAX_WARMUP_SEQUENCE_LENGTH=512 ghcr.io/huggingface/text-embeddings-inference:hpu-latest --model-id BAAI/bge-large-en-v1.5 --dtype bfloat16 -``` We also recommend sharing a volume with the Docker container (`volume=$PWD/data`) to avoid downloading weights every run. From 235bc655000a9c86c3db6b6651036418275cdd28 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Wed, 2 Apr 2025 20:03:14 -0400 Subject: [PATCH 5/8] add content to local_intel.md Signed-off-by: Liu, Kaixuan --- docs/source/en/local_intel.md | 88 +++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/docs/source/en/local_intel.md b/docs/source/en/local_intel.md index e69de29b..2b149f1b 100644 --- a/docs/source/en/local_intel.md +++ b/docs/source/en/local_intel.md @@ -0,0 +1,88 @@ + + +# Using TEI locally with Intel® Hardware + +This guide explains how to build and deploy `text-embeddings-inference` containers optimized for Intel® hardware, including CPUs, XPUs, and HPUs. + +## Building Docker Images + +### Build a CPU Container with Intel® Extension for PyTorch Acceleration + +To build a container optimized for Intel® CPUs, run the following command: + +```shell +platform="cpu" + +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_cpu_ipex +``` + +### Build an XPU Container with Intel® Extension for PyTorch Acceleration + +To build a container optimized for Intel® XPUs, run the following command: + +```shell +platform="xpu" + +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_xpu_ipex +``` + +### Build an HPU (Gaudi) Container + +To build a container optimized for Intel® HPUs (Gaudi), run the following command: + +```shell +platform="hpu" + +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_hpu +``` + +## Deploying Docker Containers + +### Deploy on Intel® CPU + +To deploy your model on an Intel® CPU, use the following command: + +```shell +model='BAAI/bge-large-en-v1.5' +volume=$PWD/data + +docker run -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-ipex-latest --model-id $model +``` + +### Deploy on Intel® XPU + +To deploy your model on an Intel® XPU, use the following command: + +```shell +model='BAAI/bge-large-en-v1.5' +volume=$PWD/data + +docker run -p 8080:80 -v $volume:/data --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --pull always ghcr.io/huggingface/text-embeddings-inference:xpu-ipex-latest --model-id $model --dtype float16 +``` + +### Deploy on HPU (Gaudi) + +To deploy your model on an Intel® HPU (Gaudi), use the following command: + +```shell +model='BAAI/bge-large-en-v1.5' +volume=$PWD/data + +docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e MAX_WARMUP_SEQUENCE_LENGTH=512 ghcr.io/huggingface/text-embeddings-inference:hpu-latest --model-id $model --dtype bfloat16 +``` + +Now you are ready to use `text-embeddings-inference` locally with Intel® hardware. \ No newline at end of file From 89bf27b49e62328e090a28727703ee9e8b6e453e Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Wed, 2 Apr 2025 20:11:43 -0400 Subject: [PATCH 6/8] adjust Signed-off-by: Liu, Kaixuan --- docs/source/en/local_intel.md | 46 ++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/docs/source/en/local_intel.md b/docs/source/en/local_intel.md index 2b149f1b..314951ba 100644 --- a/docs/source/en/local_intel.md +++ b/docs/source/en/local_intel.md @@ -18,9 +18,9 @@ rendered properly in your Markdown viewer. This guide explains how to build and deploy `text-embeddings-inference` containers optimized for Intel® hardware, including CPUs, XPUs, and HPUs. -## Building Docker Images +## CPU -### Build a CPU Container with Intel® Extension for PyTorch Acceleration +### Build Docker Container To build a container optimized for Intel® CPUs, run the following command: @@ -30,51 +30,53 @@ platform="cpu" docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_cpu_ipex ``` -### Build an XPU Container with Intel® Extension for PyTorch Acceleration +### Deploy Docker Container -To build a container optimized for Intel® XPUs, run the following command: +To deploy your model on an Intel® CPU, use the following command: ```shell -platform="xpu" +model='BAAI/bge-large-en-v1.5' +volume=$PWD/data -docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_xpu_ipex +docker run -p 8080:80 -v $volume:/data tei_cpu_ipex --model-id $model ``` -### Build an HPU (Gaudi) Container +## XPU -To build a container optimized for Intel® HPUs (Gaudi), run the following command: +### Build Docker Container + +To build a container optimized for Intel® XPUs, run the following command: ```shell -platform="hpu" +platform="xpu" -docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_hpu +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_xpu_ipex ``` -## Deploying Docker Containers - -### Deploy on Intel® CPU +### Deploy Docker Container -To deploy your model on an Intel® CPU, use the following command: +To deploy your model on an Intel® XPU, use the following command: ```shell model='BAAI/bge-large-en-v1.5' volume=$PWD/data -docker run -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-ipex-latest --model-id $model +docker run -p 8080:80 -v $volume:/data --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path tei_xpu_ipex --model-id $model --dtype float16 ``` -### Deploy on Intel® XPU +## HPU -To deploy your model on an Intel® XPU, use the following command: +### Build Docker Container + +To build a container optimized for Intel® HPUs (Gaudi), run the following command: ```shell -model='BAAI/bge-large-en-v1.5' -volume=$PWD/data +platform="hpu" -docker run -p 8080:80 -v $volume:/data --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --pull always ghcr.io/huggingface/text-embeddings-inference:xpu-ipex-latest --model-id $model --dtype float16 +docker build . -f Dockerfile-intel --build-arg PLATFORM=$platform -t tei_hpu ``` -### Deploy on HPU (Gaudi) +### Deploy Docker Container To deploy your model on an Intel® HPU (Gaudi), use the following command: @@ -82,7 +84,7 @@ To deploy your model on an Intel® HPU (Gaudi), use the following command: model='BAAI/bge-large-en-v1.5' volume=$PWD/data -docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e MAX_WARMUP_SEQUENCE_LENGTH=512 ghcr.io/huggingface/text-embeddings-inference:hpu-latest --model-id $model --dtype bfloat16 +docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e MAX_WARMUP_SEQUENCE_LENGTH=512 tei_hpu --model-id $model --dtype bfloat16 ``` Now you are ready to use `text-embeddings-inference` locally with Intel® hardware. \ No newline at end of file From 0e24100a579ce3acd755c8773741802618b648d3 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Wed, 2 Apr 2025 20:36:20 -0400 Subject: [PATCH 7/8] rename Signed-off-by: Liu, Kaixuan --- .../en/{local_intel.md => intel_container.md} | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) rename docs/source/en/{local_intel.md => intel_container.md} (75%) diff --git a/docs/source/en/local_intel.md b/docs/source/en/intel_container.md similarity index 75% rename from docs/source/en/local_intel.md rename to docs/source/en/intel_container.md index 314951ba..f260fb4e 100644 --- a/docs/source/en/local_intel.md +++ b/docs/source/en/intel_container.md @@ -14,13 +14,13 @@ rendered properly in your Markdown viewer. --> -# Using TEI locally with Intel® Hardware +# Using TEI Container with Intel® Hardware This guide explains how to build and deploy `text-embeddings-inference` containers optimized for Intel® hardware, including CPUs, XPUs, and HPUs. ## CPU -### Build Docker Container +### Build Docker Image To build a container optimized for Intel® CPUs, run the following command: @@ -43,7 +43,7 @@ docker run -p 8080:80 -v $volume:/data tei_cpu_ipex --model-id $model ## XPU -### Build Docker Container +### Build Docker Image To build a container optimized for Intel® XPUs, run the following command: @@ -66,7 +66,7 @@ docker run -p 8080:80 -v $volume:/data --device=/dev/dri -v /dev/dri/by-path:/de ## HPU -### Build Docker Container +### Build Docker Image To build a container optimized for Intel® HPUs (Gaudi), run the following command: @@ -87,4 +87,24 @@ volume=$PWD/data docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e MAX_WARMUP_SEQUENCE_LENGTH=512 tei_hpu --model-id $model --dtype bfloat16 ``` -Now you are ready to use `text-embeddings-inference` locally with Intel® hardware. \ No newline at end of file +## Prebuilt Docker Images + +For convenience, prebuilt Docker images are available on GitHub Container Registry (GHCR). You can pull these images directly without the need to build them manually: + +### CPU +To use the prebuilt image optimized for Intel® CPUs, run: +```shell +docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-ipex-latest +``` + +### XPU +To use the prebuilt image optimized for Intel® XPUs, run: +```shell +docker pull ghcr.io/huggingface/text-embeddings-inference:xpu-ipex-latest +``` + +### HPU +To use the prebuilt image optimized for Intel® HPUs (Gaudi), run: +```shell +docker pull ghcr.io/huggingface/text-embeddings-inference:hpu-latest +``` \ No newline at end of file From da36d9cbac1b834eb977fca0eae5f1ad5c2e1c4a Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Thu, 3 Apr 2025 13:49:21 -0400 Subject: [PATCH 8/8] link to toctree file Signed-off-by: Liu, Kaixuan --- docs/source/en/_toctree.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 211d1ca5..68f40ada 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -19,6 +19,8 @@ # title: Using TEI CLI - local: custom_container title: Build custom container for TEI + - local: intel_container + title: Using TEI container with Intel Hardware - local: examples title: Example uses title: Tutorials