diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 5b415c646e8c6..08d4520038e33 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -51,7 +51,14 @@ static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size"); exit(1); \ } \ } while (0) -#endif // CUDART_VERSION >= 11 +#endif // CUDART_VERSION >= 12000 + +// define nop for old CUDA versions to fix compilation issues +#if CUDART_VERSION < 11020 +__device__ void __builtin_assume(bool exp) { + (void) exp; +} +#endif // CUDART_VERSION < 11020 #ifdef GGML_CUDA_F16 typedef half dfloat; // dequantize float