From 86160325fa71e86bd74b34b764f7cb66d3249564 Mon Sep 17 00:00:00 2001 From: louie-tsai Date: Wed, 9 Apr 2025 01:40:10 +0000 Subject: [PATCH] add Gaudi support for TEI Embedding service in ChatQnA Signed-off-by: Tsai, Louie --- ChatQnA/README.md | 7 +++++ .../docker_compose/intel/hpu/gaudi/README.md | 6 ++--- .../gaudi/compose.tei-embedding-gaudi.yaml | 26 +++++++++++++++++++ .../intel/hpu/gaudi/compose.yaml | 2 +- .../intel/hpu/gaudi/compose_faqgen.yaml | 2 +- .../intel/hpu/gaudi/compose_faqgen_tgi.yaml | 2 +- .../intel/hpu/gaudi/compose_guardrails.yaml | 2 +- .../intel/hpu/gaudi/compose_tgi.yaml | 2 +- .../hpu/gaudi/compose_without_rerank.yaml | 2 +- .../hpu/gaudi/how_to_validate_service.md | 2 +- .../intel/hpu/gaudi/prometheus.yaml | 2 +- ChatQnA/tests/test_compose_faqgen_on_gaudi.sh | 2 +- .../tests/test_compose_faqgen_tgi_on_gaudi.sh | 2 +- .../tests/test_compose_guardrails_on_gaudi.sh | 2 +- ChatQnA/tests/test_compose_on_gaudi.sh | 2 +- ChatQnA/tests/test_compose_tgi_on_gaudi.sh | 2 +- .../test_compose_without_rerank_on_gaudi.sh | 2 +- 17 files changed, 50 insertions(+), 17 deletions(-) create mode 100644 ChatQnA/docker_compose/intel/hpu/gaudi/compose.tei-embedding-gaudi.yaml diff --git a/ChatQnA/README.md b/ChatQnA/README.md index bd188d3671..8696837d89 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -261,6 +261,13 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/ docker compose -f compose.yaml -f compose.telemetry.yaml up -d ``` +To enable TEI Embedding on Gaudi for many concurrent user requests, compose.tei-embedding-gaudi.yaml file need to be merged along with default compose.yaml file. + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/ +docker compose -f compose.yaml -f compose.tei-embedding-gaudi.yaml up -d +``` + Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source. ### Deploy ChatQnA on Xeon diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index d6893b559b..39a7494353 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -66,7 +66,7 @@ The ChatQnA docker images should automatically be downloaded from the `OPEA regi ✔ Network gaudi_default Created 0.1s ✔ Container tei-reranking-gaudi-server Started 0.7s ✔ Container vllm-gaudi-server Started 0.7s - ✔ Container tei-embedding-gaudi-server Started 0.3s + ✔ Container tei-embedding-server Started 0.3s ✔ Container redis-vector-db Started 0.6s ✔ Container retriever-redis-server Started 1.1s ✔ Container dataprep-redis-server Started 1.1s @@ -95,7 +95,7 @@ d560c232b120 opea/retriever:latest a1d7ca2d3787 ghcr.io/huggingface/tei-gaudi:1.5.0 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, [::]:8808->80/tcp tei-reranking-gaudi-server 9a9f3fd4fd4c opea/vllm-gaudi:latest "python3 -m vllm.ent…" 2 minutes ago Exited (1) 2 minutes ago vllm-gaudi-server 1ab9bbdf5182 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db -9ee0789d819e ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, [::]:8090->80/tcp tei-embedding-gaudi-server +9ee0789d819e ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, [::]:8090->80/tcp tei-embedding-server ``` ### Test the Pipeline @@ -129,7 +129,7 @@ docker compose -f compose.yaml down ✔ Container vllm-gaudi-server Removed 0.0s ✔ Container retriever-redis-server Removed 10.4s ✔ Container tei-reranking-gaudi-server Removed 2.0s - ✔ Container tei-embedding-gaudi-server Removed 1.2s + ✔ Container tei-embedding-server Removed 1.2s ✔ Container redis-vector-db Removed 0.4s ✔ Network gaudi_default Removed 0.4s ``` diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.tei-embedding-gaudi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.tei-embedding-gaudi.yaml new file mode 100644 index 0000000000..6da74dd9fe --- /dev/null +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.tei-embedding-gaudi.yaml @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tei-embedding-service: + image: ghcr.io/huggingface/tei-gaudi:1.5.0 + container_name: tei-embedding-gaudi-server + ports: + - "8090:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index ef24a86fff..91938f8d1f 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -34,7 +34,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - container_name: tei-embedding-gaudi-server + container_name: tei-embedding-server ports: - "8090:80" volumes: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml index 84cb5f24eb..d9b3b99882 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml @@ -28,7 +28,7 @@ services: LOGFLAG: ${LOGFLAG} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - container_name: tei-embedding-gaudi-server + container_name: tei-embedding-server ports: - "8090:80" volumes: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml index 670cacceae..9d4eddc84e 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml @@ -28,7 +28,7 @@ services: LOGFLAG: ${LOGFLAG} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - container_name: tei-embedding-gaudi-server + container_name: tei-embedding-server ports: - "8090:80" volumes: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 5ff5e1858c..016b700447 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -66,7 +66,7 @@ services: restart: unless-stopped tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - container_name: tei-embedding-gaudi-server + container_name: tei-embedding-server ports: - "8090:80" volumes: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index f10a34e6a1..eaf3b5e230 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -27,7 +27,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - container_name: tei-embedding-gaudi-server + container_name: tei-embedding-server ports: - "8090:80" volumes: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index 946c4a8781..7ec44d0e23 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -27,7 +27,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 - container_name: tei-embedding-gaudi-server + container_name: tei-embedding-server ports: - "8090:80" volumes: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md index ba18ce7ae0..8deefa3ad2 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md @@ -48,7 +48,7 @@ f810f3b4d329 opea/embedding:latest "python embed 2fa17d84605f opea/dataprep:latest "python prepare_doc_…" 2 minutes ago Up 2 minutes 0.0.0.0:6007->6007/tcp, :::6007->5000/tcp dataprep-redis-server 69e1fb59e92c opea/retriever:latest "/home/user/comps/re…" 2 minutes ago Up 2 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server 313b9d14928a opea/reranking-tei:latest "python reranking_te…" 2 minutes ago Up 2 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp reranking-tei-gaudi-server -174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:1.5.0 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-gaudi-server +174bd43fa6b5 ghcr.io/huggingface/tei-gaudi:1.5.0 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8090->80/tcp, :::8090->80/tcp tei-embedding-server 05c40b636239 ghcr.io/huggingface/tgi-gaudi:2.3.1 "text-generation-lau…" 2 minutes ago Exited (1) About a minute ago tgi-gaudi-server 74084469aa33 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 2 minutes ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db 88399dbc9e43 ghcr.io/huggingface/text-embeddings-inference:cpu-1.6 "text-embeddings-rou…" 2 minutes ago Up 2 minutes 0.0.0.0:8808->80/tcp, :::8808->80/tcp tei-reranking-gaudi-server diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml index 8816f4ec68..7fb4f85e1c 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml @@ -20,7 +20,7 @@ scrape_configs: - job_name: "tei-embedding" metrics_path: /metrics static_configs: - - targets: ["tei-embedding-gaudi-server:80"] + - targets: ["tei-embedding-server:80"] - job_name: "tei-reranking" metrics_path: /metrics static_configs: diff --git a/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh b/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh index dabd70aacd..645753dc83 100644 --- a/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh @@ -120,7 +120,7 @@ function validate_microservices() { "${ip_address}:8090/embed" \ "[[" \ "tei-embedding" \ - "tei-embedding-gaudi-server" \ + "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' sleep 1m # retrieval can't curl as expected, try to wait for more time diff --git a/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh b/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh index 10cc6d34d4..3d9e1d6936 100644 --- a/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh @@ -116,7 +116,7 @@ function validate_microservices() { "${ip_address}:8090/embed" \ "[[" \ "tei-embedding" \ - "tei-embedding-gaudi-server" \ + "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' sleep 1m # retrieval can't curl as expected, try to wait for more time diff --git a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh index cc29b89268..791dee0314 100644 --- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh @@ -113,7 +113,7 @@ function validate_microservices() { "${ip_address}:8090/embed" \ "[[" \ "tei-embedding" \ - "tei-embedding-gaudi-server" \ + "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' sleep 1m # retrieval can't curl as expected, try to wait for more time diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index b2e8c5709c..918583bb5c 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -100,7 +100,7 @@ function validate_microservices() { "${ip_address}:8090/embed" \ "\[\[" \ "tei-embedding" \ - "tei-embedding-gaudi-server" \ + "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh index ba3bef9a3a..54f586ac3e 100644 --- a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh @@ -110,7 +110,7 @@ function validate_microservices() { "${ip_address}:8090/embed" \ "[[" \ "tei-embedding" \ - "tei-embedding-gaudi-server" \ + "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' sleep 1m # retrieval can't curl as expected, try to wait for more time diff --git a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh index 5e1f57b0e3..bf2b9e9f67 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh @@ -109,7 +109,7 @@ function validate_microservices() { "${ip_address}:8090/embed" \ "[[" \ "tei-embedding" \ - "tei-embedding-gaudi-server" \ + "tei-embedding-server" \ '{"inputs":"What is Deep Learning?"}' sleep 1m # retrieval can't curl as expected, try to wait for more time