diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 72b334b44d24..70adcafcfb18 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -89,7 +89,7 @@ jobs: context: "./backend" - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-l4t-diffusers' @@ -187,7 +187,7 @@ jobs: # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' @@ -199,7 +199,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' @@ -211,7 +211,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' @@ -223,7 +223,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' @@ -235,7 +235,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' @@ -248,7 +248,7 @@ jobs: # CUDA 12 additional backends - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' @@ -260,7 +260,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' @@ -272,7 +272,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' @@ -284,7 +284,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' @@ -296,7 +296,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' @@ -578,7 +578,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -615,7 +615,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' @@ -675,7 +675,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -700,7 +700,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' @@ -760,7 +760,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -836,7 +836,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' @@ -872,7 +872,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -897,7 +897,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 6bcb995ba59a..a2410b22827e 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -36,7 +36,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-gpu-nvidia-cuda-12' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 5b12ba07033f..3864930d03ed 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -91,7 +91,7 @@ jobs: aio: "-aio-gpu-nvidia-cuda-11" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12' @@ -144,7 +144,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "8" + cuda-minor-version: "0" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64' diff --git a/Dockerfile b/Dockerfile index 9bf6964c3f60..1e356014dec3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ FROM requirements AS requirements-drivers ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=12 -ARG CUDA_MINOR_VERSION=8 +ARG CUDA_MINOR_VERSION=0 ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT diff --git a/Makefile b/Makefile index ec94ef1d4365..24502b57be9e 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ prepare-e2e: mkdir -p $(TEST_DIR) cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin - docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=8 -t localai-tests . + docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests . run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures) diff --git a/backend/README.md b/backend/README.md index 8d68d613ec4d..87fd9f28f89c 100644 --- a/backend/README.md +++ b/backend/README.md @@ -111,7 +111,7 @@ docker build -f backend/Dockerfile.python \ --build-arg BACKEND=transformers \ --build-arg BUILD_TYPE=cublas12 \ --build-arg CUDA_MAJOR_VERSION=12 \ - --build-arg CUDA_MINOR_VERSION=8 \ + --build-arg CUDA_MINOR_VERSION=0 \ -t localai-backend-transformers . # Build Go backend