diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 72b334b44d24..70adcafcfb18 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -89,7 +89,7 @@ jobs:
             context: "./backend"
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-l4t-diffusers'
@@ -187,7 +187,7 @@ jobs:
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
@@ -199,7 +199,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
@@ -211,7 +211,7 @@ jobs:
             context: "./"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vllm'
@@ -223,7 +223,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-transformers'
@@ -235,7 +235,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
@@ -248,7 +248,7 @@ jobs:
           # CUDA 12 additional backends
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
@@ -260,7 +260,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
@@ -272,7 +272,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-coqui'
@@ -284,7 +284,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-bark'
@@ -296,7 +296,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
@@ -578,7 +578,7 @@ jobs:
             context: "./"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -615,7 +615,7 @@ jobs:
             context: "./"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -675,7 +675,7 @@ jobs:
             context: "./"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -700,7 +700,7 @@ jobs:
             context: "./"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -760,7 +760,7 @@ jobs:
             context: "./"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -836,7 +836,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -872,7 +872,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -897,7 +897,7 @@ jobs:
             context: "./backend"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 6bcb995ba59a..a2410b22827e 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -36,7 +36,7 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-gpu-nvidia-cuda-12'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 5b12ba07033f..3864930d03ed 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -91,7 +91,7 @@ jobs:
             aio: "-aio-gpu-nvidia-cuda-11"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12'
@@ -144,7 +144,7 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "8"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64'
diff --git a/Dockerfile b/Dockerfile
index 9bf6964c3f60..1e356014dec3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,7 +18,7 @@ FROM requirements AS requirements-drivers
 
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=8
+ARG CUDA_MINOR_VERSION=0
 ARG SKIP_DRIVERS=false
 ARG TARGETARCH
 ARG TARGETVARIANT
diff --git a/Makefile b/Makefile
index ec94ef1d4365..24502b57be9e 100644
--- a/Makefile
+++ b/Makefile
@@ -170,7 +170,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=8 -t localai-tests .
+	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
diff --git a/backend/README.md b/backend/README.md
index 8d68d613ec4d..87fd9f28f89c 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -111,7 +111,7 @@ docker build -f backend/Dockerfile.python \
   --build-arg BACKEND=transformers \
   --build-arg BUILD_TYPE=cublas12 \
   --build-arg CUDA_MAJOR_VERSION=12 \
-  --build-arg CUDA_MINOR_VERSION=8 \
+  --build-arg CUDA_MINOR_VERSION=0 \
   -t localai-backend-transformers .
 
 # Build Go backend