Skip to content

Commit 119e61d

Browse files
committed
deps(llama.cpp): update build variables to follow upstream
Update build recipes with ggml-org/llama.cpp#8006 Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 4fc889b commit 119e61d

File tree

9 files changed

+31
-31
lines changed

9 files changed

+31
-31
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ jobs:
220220
export CPLUS_INCLUDE_PATH=/usr/local/include
221221
# Used to run the newer GNUMake version from brew that supports --output-sync
222222
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
223-
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
223+
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
224224
- name: Setup tmate session if tests fail
225225
if: ${{ failure() }}
226226
uses: mxschmitt/[email protected]

Makefile

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ ifeq ($(OS),Darwin)
8080
BUILD_TYPE=metal
8181
# disable metal if on Darwin and any other value is explicitly passed.
8282
else ifneq ($(BUILD_TYPE),metal)
83-
CMAKE_ARGS+=-DLLAMA_METAL=OFF
84-
export LLAMA_NO_ACCELERATE=1
83+
CMAKE_ARGS+=-DGGML_METAL=OFF
84+
export GGML_NO_ACCELERATE=1
8585
endif
8686

8787
ifeq ($(BUILD_TYPE),metal)
@@ -98,13 +98,13 @@ endif
9898

9999
ifeq ($(BUILD_TYPE),cublas)
100100
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
101-
export LLAMA_CUBLAS=1
101+
export GGML_CUDA=1
102102
export WHISPER_CUDA=1
103103
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
104104
endif
105105

106106
ifeq ($(BUILD_TYPE),vulkan)
107-
CMAKE_ARGS+=-DLLAMA_VULKAN=1
107+
CMAKE_ARGS+=-DGGML_VULKAN=1
108108
endif
109109

110110
ifeq ($(BUILD_TYPE),hipblas)
@@ -118,13 +118,13 @@ ifeq ($(BUILD_TYPE),hipblas)
118118
export WHISPER_HIPBLAS=1
119119
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
120120
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
121-
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
121+
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
122122
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
123123
endif
124124

125125
ifeq ($(BUILD_TYPE),metal)
126126
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
127-
export LLAMA_METAL=1
127+
export GGML_METAL=1
128128
export WHISPER_METAL=1
129129
endif
130130

@@ -354,7 +354,7 @@ else
354354
endif
355355

356356
dist-cross-linux-arm64:
357-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
357+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
358358
STATIC=true $(MAKE) build
359359
mkdir -p release
360360
# if BUILD_ID is empty, then we don't append it to the binary name
@@ -711,21 +711,21 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
711711
cp -rf backend/cpp/llama backend/cpp/llama-avx2
712712
$(MAKE) -C backend/cpp/llama-avx2 purge
713713
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
714-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
714+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
715715
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
716716

717717
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
718718
cp -rf backend/cpp/llama backend/cpp/llama-avx
719719
$(MAKE) -C backend/cpp/llama-avx purge
720720
$(info ${GREEN}I llama-cpp build info:avx${RESET})
721-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
721+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
722722
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
723723

724724
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
725725
cp -rf backend/cpp/llama backend/cpp/llama-fallback
726726
$(MAKE) -C backend/cpp/llama-fallback purge
727727
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
728-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
728+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
729729
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
730730
# TODO: every binary should have its own folder instead, so can have different metal implementations
731731
ifeq ($(BUILD_TYPE),metal)
@@ -736,7 +736,7 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
736736
cp -rf backend/cpp/llama backend/cpp/llama-cuda
737737
$(MAKE) -C backend/cpp/llama-cuda purge
738738
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
739-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
739+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
740740
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
741741

742742
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
@@ -764,7 +764,7 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
764764
cp -rf backend/cpp/llama backend/cpp/llama-grpc
765765
$(MAKE) -C backend/cpp/llama-grpc purge
766766
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
767-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
767+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -D =off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
768768
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
769769

770770
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc

backend/cpp/llama/Makefile

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,35 +6,35 @@ BUILD_TYPE?=
66
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
77
TARGET?=--target grpc-server
88

9-
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
9+
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
1010
ifeq ($(BUILD_TYPE),cublas)
11-
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
12-
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
11+
CMAKE_ARGS+=-DGGML_CUDA=ON
12+
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
1313
# to CMAKE_ARGS automatically
1414
else ifeq ($(BUILD_TYPE),openblas)
15-
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
16-
# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
15+
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
16+
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
1717
else ifeq ($(BUILD_TYPE),clblas)
18-
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
18+
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
1919
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
2020
else ifeq ($(BUILD_TYPE),hipblas)
21-
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
22-
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
21+
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
22+
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
2323
# But if it's OSX without metal, disable it here
2424
else ifeq ($(OS),Darwin)
2525
ifneq ($(BUILD_TYPE),metal)
26-
CMAKE_ARGS+=-DLLAMA_METAL=OFF
26+
CMAKE_ARGS+=-DGGML_METAL=OFF
2727
else
2828
TARGET+=--target ggml-metal
2929
endif
3030
endif
3131

3232
ifeq ($(BUILD_TYPE),sycl_f16)
33-
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
33+
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
3434
endif
3535

3636
ifeq ($(BUILD_TYPE),sycl_f32)
37-
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
37+
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
3838
endif
3939

4040
llama.cpp:

docs/content/docs/advanced/fine-tuning.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ And we convert it to the gguf format that LocalAI can consume:
118118

119119
# Convert to gguf
120120
git clone https://github.com/ggerganov/llama.cpp.git
121-
pushd llama.cpp && make LLAMA_CUBLAS=1 && popd
121+
pushd llama.cpp && make GGML_CUDA=1 && popd
122122

123123
# We need to convert the pytorch model into ggml for quantization
124124
# It crates 'ggml-model-f16.bin' in the 'merged' directory.

docs/content/docs/faq.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,4 @@ This typically happens when your prompt exceeds the context size. Try to reduce
5555

5656
### I'm getting a 'SIGILL' error, what's wrong?
5757

58-
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make build`
58+
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make build`

docs/content/docs/getting-started/build.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,14 @@ Here is the list of the variables available that can be used to customize the bu
101101
LocalAI uses different backends based on ggml and llama.cpp to run models. If your CPU doesn't support common instruction sets, you can disable them during build:
102102

103103
```
104-
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_AVX=OFF -DLLAMA_FMA=OFF" make build
104+
CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" make build
105105
```
106106

107107
To have effect on the container image, you need to set `REBUILD=true`:
108108

109109
```
110110
docker run quay.io/go-skynet/localai
111-
docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -e REBUILD=true -e CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_AVX=OFF -DLLAMA_FMA=OFF" -v $PWD/models:/models quay.io/go-skynet/local-ai:latest
111+
docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -e REBUILD=true -e CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" -v $PWD/models:/models quay.io/go-skynet/local-ai:latest
112112
```
113113

114114
{{% /alert %}}

entrypoint.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ else
2222
echo "@@@@@"
2323
echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true"
2424
echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:"
25-
echo 'CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF"'
25+
echo 'CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF"'
2626
echo "see the documentation at: https://localai.io/basics/build/index.html"
2727
echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288"
2828
echo "@@@@@"

examples/e2e-fine-tuning/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ And we convert it to the gguf format that LocalAI can consume:
6565

6666
# Convert to gguf
6767
git clone https://github.com/ggerganov/llama.cpp.git
68-
pushd llama.cpp && make LLAMA_CUBLAS=1 && popd
68+
pushd llama.cpp && make GGML_CUDA=1 && popd
6969

7070
# We need to convert the pytorch model into ggml for quantization
7171
# It crates 'ggml-model-f16.bin' in the 'merged' directory.

examples/e2e-fine-tuning/notebook.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1600,7 +1600,7 @@
16001600
"source": [
16011601
"\n",
16021602
"!git clone https://github.com/ggerganov/llama.cpp.git\n",
1603-
"!cd llama.cpp && make LLAMA_CUBLAS=1\n",
1603+
"!cd llama.cpp && make GGML_CUDA=1\n",
16041604
"\n"
16051605
]
16061606
},

0 commit comments

Comments
 (0)