huggingface · OlivierDehaene · Mar 29, 2023 · Feb 28, 2023 · Feb 28, 2023 · Feb 28, 2023
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -83,4 +83,48 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
-          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
+          cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
+
+  build-and-push-sagemaker-image:
+    needs:
+      - build-and-push-image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Initialize Docker Buildx
+        uses: docker/[email protected]
+        with:
+          install: true
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Inject slug/short variables
+        uses: rlespinasse/github-slug-action@v4
+      - name: Login to internal Container Registry
+        uses: docker/[email protected]
+        with:
+          username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
+          password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
+          registry: registry.internal.huggingface.tech
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/[email protected]
+        with:
+          flavor: |
+            latest=auto
+          images: |
+            registry.internal.huggingface.tech/api-inference/community/text-generation-inference/sagemaker
+          tags: |
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
+            type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          file: Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          platforms: 'linux/amd64'
+          target: sagemaker
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
diff --git a/Dockerfile b/Dockerfile
@@ -27,7 +27,7 @@ COPY router router
 COPY launcher launcher
 RUN cargo build --release
 
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as base
 
 ENV LANG=C.UTF-8 \
     LC_ALL=C.UTF-8 \
@@ -76,5 +76,16 @@ COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bi
 # Install launcher
 COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
 
+# AWS Sagemaker compatbile image
+FROM base as sagemaker
+
+COPY sagemaker-entrypoint.sh entrypoint.sh
+RUN chmod +x entrypoint.sh
+
+ENTRYPOINT ["./entrypoint.sh"]
+
+# Original image
+FROM base
+
 ENTRYPOINT ["text-generation-launcher"]
 CMD ["--json-output"]
diff --git a/router/src/server.rs b/router/src/server.rs
@@ -529,11 +529,19 @@ pub async fn run(
     // Create router
     let app = Router::new()
         .merge(SwaggerUi::new("/docs").url("/api-doc/openapi.json", ApiDoc::openapi()))
+        // Base routes
         .route("/", post(compat_generate))
         .route("/generate", post(generate))
         .route("/generate_stream", post(generate_stream))
-        .route("/", get(health))
+        // AWS Sagemaker route
+        .route("/invocations", post(compat_generate))
+        // Base Health route
         .route("/health", get(health))
+        // Inference API health route
+        .route("/", get(health))
+        // AWS Sagemaker health route
+        .route("/ping", get(health))
+        // Prometheus metrics route
         .route("/metrics", get(metrics))
         .layer(Extension(compat_return_full_text))
         .layer(Extension(infer))

diff --git a/sagemaker-entrypoint.sh b/sagemaker-entrypoint.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [[ -z "${HF_MODEL_ID}" ]]; then
+  echo "HF_MODEL_ID must be set"
+  exit 1
+fi
+
+if [[ -n "${HF_MODEL_REVISION}" ]]; then
+  export REVISION="${HF_MODEL_REVISION}"
+fi
+
+if [[ -n "${SM_NUM_GPUS}" ]]; then
+  export NUM_SHARD="${SM_NUM_GPUS}"
+fi
+
+if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then
+  export QUANTIZE="${HF_MODEL_QUANTIZE}"
+fi
+
+text-generation-launcher --port 8080