Skip to content

Commit afb5d28

Browse files
committed
flake.nix: rewrite
1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. 5. Use specific CUDA packages instead of cudatoolkit on the advice of @SomeoneSerge.
1 parent 708e179 commit afb5d28

File tree

6 files changed

+238
-171
lines changed

6 files changed

+238
-171
lines changed

.devops/apps.nix

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
names: pkgs:
2+
3+
let
4+
default = builtins.elemAt names 0;
5+
mkApp = name: {
6+
${name} = {
7+
type = "app";
8+
program = "${pkgs.llama-cpp}/bin/${name}";
9+
};
10+
};
11+
result = builtins.foldl' (acc: name: (mkApp name) // acc) {} names;
12+
in
13+
14+
result // { default = result.${default}; }

.devops/devshells.nix

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
pkgs:
2+
3+
let
4+
llama-python = pkgs.python3.withPackages (ps: [
5+
ps.numpy
6+
ps.sentencepiece
7+
]);
8+
9+
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
10+
llama-python-extra = pkgs.python3.withPackages (ps: [
11+
ps.numpy
12+
ps.sentencepiece
13+
ps.torchWithoutCuda
14+
ps.transformers
15+
]);
16+
in
17+
18+
{
19+
default = pkgs.mkShell {
20+
name = "default";
21+
description = "contains numpy and sentencepiece";
22+
inputsFrom = [ pkgs.llama-cpp ];
23+
buildInputs = [ llama-python ];
24+
};
25+
26+
extra = pkgs.mkShell {
27+
name = "extra";
28+
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
29+
inputsFrom = [ pkgs.llama-cpp ];
30+
buildInputs = [ llama-python-extra ];
31+
};
32+
}

.devops/overlay.nix

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
final: prev:
2+
3+
let
4+
inherit (prev.stdenv) isAarch32 isAarch64 isDarwin;
5+
6+
darwinSpecific = if isAarch64 then {
7+
inherit (prev.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit;
8+
} else {
9+
inherit (prev.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo;
10+
};
11+
12+
osSpecific = if isDarwin then darwinSpecific else {};
13+
in
14+
15+
{
16+
llama-cpp = prev.callPackage ./package.nix osSpecific;
17+
llama-cpp-opencl = prev.callPackage ./package.nix (osSpecific // { useOpenCL = true; });
18+
llama-cpp-cuda = prev.callPackage ./package.nix (osSpecific // { useCuda = true; });
19+
llama-cpp-rocm = prev.callPackage ./package.nix (osSpecific // { useRocm = true; });
20+
}

.devops/package.nix

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
{ lib
2+
, config
3+
, stdenv
4+
, cmake
5+
, ninja
6+
, pkg-config
7+
, git
8+
, python3
9+
, openmpi
10+
, openblas
11+
, cudaPackages
12+
, rocmPackages
13+
, clblast
14+
, Accelerate ? null
15+
, MetalKit ? null
16+
, CoreVideo ? null
17+
, CoreGraphics ? null
18+
, useOpenCL ? false
19+
, useCuda ? config.cudaSupport
20+
, useRocm ? config.rocmSupport
21+
}@inputs:
22+
23+
let
24+
inherit (lib) cmakeBool cmakeFeature optional optionals versionOlder;
25+
isDefault = !useOpenCL && !useCuda && !useRocm;
26+
27+
# It's necessary to consistently use backendStdenv when building with CUDA support,
28+
# otherwise we get libstdc++ errors downstream.
29+
stdenv = throw "Use effectiveStdenv instead";
30+
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
31+
32+
# Give a little description difference between the flavors.
33+
descriptionSuffix = if useOpenCL then
34+
" (OpenCL accelerated)"
35+
else if useCuda then
36+
" (CUDA accelerated)"
37+
else if useRocm then
38+
" (ROCm accelerated)"
39+
else if (MetalKit != null) then
40+
" (MetalKit accelerated)"
41+
else "";
42+
43+
# TODO: package the Python in this repository in a Nix-like way.
44+
llama-python = python3.withPackages (ps: [ ps.numpy ps.sentencepiece ]);
45+
46+
# See ./overlay.nix for where these dependencies are passed in.
47+
defaultBuildInputs = builtins.filter (p: p != null) [
48+
Accelerate
49+
MetalKit
50+
CoreVideo
51+
CoreGraphics
52+
];
53+
54+
cudaBuildInputs = with cudaPackages; [
55+
cuda_cccl.dev # <nv/target>
56+
cuda_cudart
57+
libcublas
58+
];
59+
60+
rocmBuildInputs = with rocmPackages; [ clr hipblas rocblas ];
61+
in
62+
63+
effectiveStdenv.mkDerivation {
64+
name = "llama.cpp";
65+
src = ../.;
66+
meta = {
67+
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
68+
mainProgram = "llama";
69+
};
70+
71+
postPatch = ''
72+
substituteInPlace ./ggml-metal.m \
73+
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
74+
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
75+
'';
76+
77+
nativeBuildInputs = [ cmake ninja pkg-config git ]
78+
++ optional useCuda [ cudaPackages.cuda_nvcc ];
79+
80+
buildInputs = [ openmpi ]
81+
++ optional useOpenCL clblast
82+
++ optionals useCuda cudaBuildInputs
83+
++ optionals useRocm rocmBuildInputs
84+
++ optionals isDefault defaultBuildInputs;
85+
86+
cmakeFlags = [
87+
(cmakeBool "LLAMA_NATIVE" true)
88+
(cmakeBool "LLAMA_BUILD_SERVER" true)
89+
(cmakeBool "BUILD_SHARED_LIBS" true)
90+
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
91+
]
92+
++ optional useOpenCL (cmakeBool "LLAMA_CLBLAST" true)
93+
++ optional useCuda (cmakeBool "LLAMA_CUBLAS" true)
94+
++ optionals useRocm [
95+
(cmakeBool "LLAMA_HIPBLAS" true)
96+
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
97+
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
98+
99+
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
100+
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
101+
# and select the line that matches the current nixpkgs version of rocBLAS.
102+
# Should likely use `rocmPackages.clr.gpuTargets`.
103+
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
104+
]
105+
++ optionals isDefault (if (MetalKit != null) then [
106+
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
107+
"-DLLAMA_METAL=ON"
108+
] else [
109+
"-DLLAMA_BLAS=ON"
110+
"-DLLAMA_BLAS_VENDOR=OpenBLAS"
111+
]);
112+
113+
postInstall = ''
114+
mv $out/bin/main $out/bin/llama
115+
mv $out/bin/server $out/bin/llama-server
116+
mkdir -p $out/include
117+
cp $src/llama.h $out/include/
118+
'';
119+
}

flake.lock

Lines changed: 3 additions & 37 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)