@@ -66,6 +66,7 @@ endif()
66
66
# 3rd party libs
67
67
option (LLAMA_ACCELERATE "llama: enable Accelerate framework" ON )
68
68
option (LLAMA_OPENBLAS "llama: use OpenBLAS" OFF )
69
+ option (LLAMA_CUBLAS "llama: use cuBLAS" OFF )
69
70
70
71
option (LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE} )
71
72
option (LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE} )
@@ -109,6 +110,7 @@ if (APPLE AND LLAMA_ACCELERATE)
109
110
message (WARNING "Accelerate framework not found" )
110
111
endif ()
111
112
endif ()
113
+
112
114
if (LLAMA_OPENBLAS )
113
115
if (LLAMA_STATIC )
114
116
set (BLA_STATIC ON )
@@ -142,6 +144,30 @@ if (LLAMA_OPENBLAS)
142
144
endif ()
143
145
endif ()
144
146
147
+ if (LLAMA_CUBLAS )
148
+ cmake_minimum_required (VERSION 3.17 )
149
+
150
+ find_package (CUDAToolkit )
151
+ if (CUDAToolkit_FOUND )
152
+ message (STATUS "cuBLAS found" )
153
+
154
+ enable_language (CUDA )
155
+
156
+ set (GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h )
157
+
158
+ add_compile_definitions (GGML_USE_CUBLAS )
159
+
160
+ if (LLAMA_STATIC )
161
+ set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static )
162
+ else ()
163
+ set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt )
164
+ endif ()
165
+
166
+ else ()
167
+ message (WARNING "cuBLAS not found" )
168
+ endif ()
169
+ endif ()
170
+
145
171
if (LLAMA_ALL_WARNINGS )
146
172
if (NOT MSVC )
147
173
set (c_flags
@@ -153,7 +179,6 @@ if (LLAMA_ALL_WARNINGS)
153
179
-Wshadow
154
180
-Wstrict-prototypes
155
181
-Wpointer-arith
156
- -Wno-unused-function
157
182
)
158
183
set (cxx_flags
159
184
-Wall
@@ -221,21 +246,26 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
221
246
message (STATUS "x86 detected" )
222
247
if (MSVC )
223
248
if (LLAMA_AVX512 )
224
- add_compile_options (/arch:AVX512 )
249
+ add_compile_options ($< $< COMPILE_LANGUAGE:C> :/arch:AVX512> )
250
+ add_compile_options ($< $< COMPILE_LANGUAGE:CXX> :/arch:AVX512> )
225
251
# MSVC has no compile-time flags enabling specific
226
252
# AVX512 extensions, neither it defines the
227
253
# macros corresponding to the extensions.
228
254
# Do it manually.
229
255
if (LLAMA_AVX512_VBMI )
230
- add_compile_definitions (__AVX512VBMI__ )
256
+ add_compile_definitions ($< $< COMPILE_LANGUAGE:C> :__AVX512VBMI__> )
257
+ add_compile_definitions ($< $< COMPILE_LANGUAGE:CXX> :__AVX512VBMI__> )
231
258
endif ()
232
259
if (LLAMA_AVX512_VNNI )
233
- add_compile_definitions (__AVX512VNNI__ )
260
+ add_compile_definitions ($< $< COMPILE_LANGUAGE:C> :__AVX512VNNI__> )
261
+ add_compile_definitions ($< $< COMPILE_LANGUAGE:CXX> :__AVX512VNNI__> )
234
262
endif ()
235
263
elseif (LLAMA_AVX2 )
236
- add_compile_options (/arch:AVX2 )
264
+ add_compile_options ($< $< COMPILE_LANGUAGE:C> :/arch:AVX2> )
265
+ add_compile_options ($< $< COMPILE_LANGUAGE:CXX> :/arch:AVX2> )
237
266
elseif (LLAMA_AVX )
238
- add_compile_options (/arch:AVX )
267
+ add_compile_options ($< $< COMPILE_LANGUAGE:C> :/arch:AVX> )
268
+ add_compile_options ($< $< COMPILE_LANGUAGE:CXX> :/arch:AVX> )
239
269
endif ()
240
270
else ()
241
271
if (LLAMA_F16C )
@@ -272,7 +302,8 @@ endif()
272
302
273
303
add_library (ggml OBJECT
274
304
ggml.c
275
- ggml.h )
305
+ ggml.h
306
+ ${GGML_CUDA_SOURCES} )
276
307
277
308
target_include_directories (ggml PUBLIC . )
278
309
target_compile_features (ggml PUBLIC c_std_11 ) # don't bump
@@ -294,6 +325,14 @@ if (BUILD_SHARED_LIBS)
294
325
target_compile_definitions (llama PRIVATE LLAMA_SHARED LLAMA_BUILD )
295
326
endif ()
296
327
328
+ if (GGML_CUDA_SOURCES )
329
+ message (STATUS "GGML CUDA sources found, configuring CUDA architecture" )
330
+ set_property (TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF )
331
+ set_property (TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto" )
332
+ set_property (TARGET llama PROPERTY CUDA_ARCHITECTURES OFF )
333
+ endif ()
334
+
335
+
297
336
#
298
337
# programs, examples and tests
299
338
#
@@ -305,4 +344,5 @@ endif ()
305
344
306
345
if (LLAMA_BUILD_EXAMPLES )
307
346
add_subdirectory (examples )
347
+ add_subdirectory (pocs )
308
348
endif ()
0 commit comments