Skip to content

flake.nix: use flake-parts #4632

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
22 changes: 22 additions & 0 deletions .devops/nix/apps.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
perSystem =
{ config, lib, ... }:
{
apps =
let
inherit (config.packages) default;
binaries = [
"llama"
"llama-embedding"
"llama-server"
"quantize"
"train-text-from-scratch"
];
mkApp = name: {
type = "app";
program = "${default}/bin/${name}";
};
in
lib.genAttrs binaries mkApp;
};
}
13 changes: 13 additions & 0 deletions .devops/nix/devshells.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
perSystem =
{ config, lib, ... }:
{
devShells =
lib.concatMapAttrs
(name: package: {
${name} = package.passthru.shell;
${name + "-extra"} = package.passthru.shell-extra;
})
config.packages;
};
}
35 changes: 35 additions & 0 deletions .devops/nix/nixpkgs-instances.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{ inputs, ... }:
{
# The _module.args definitions are passed on to modules as arguments. E.g.
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
# `_module.args.pkgs` (defined in this case by flake-parts).
perSystem =
{ system, ... }:
{
_module.args = {
pkgsCuda = import inputs.nixpkgs {
inherit system;
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
# and ucx are built with CUDA support)
config.cudaSupport = true;
config.allowUnfreePredicate =
p:
builtins.all
(
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
]
)
(p.meta.licenses or [ p.meta.license ]);
};
# Ensure dependencies use ROCm consistently
pkgsRocm = import inputs.nixpkgs {
inherit system;
config.rocmSupport = true;
};
};
};
}
189 changes: 189 additions & 0 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
{
lib,
config,
stdenv,
mkShell,
cmake,
ninja,
pkg-config,
git,
python3,
mpi,
openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
cudaPackages,
darwin,
rocmPackages,
clblast,
useBlas ? builtins.all (x: !x) [
useCuda
useMetalKit
useOpenCL
useRocm
],
useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
useOpenCL ? false,
useRocm ? config.rocmSupport,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
}@inputs:

let
inherit (lib)
cmakeBool
cmakeFeature
optionals
strings
versionOlder
;

# It's necessary to consistently use backendStdenv when building with CUDA support,
# otherwise we get libstdc++ errors downstream.
stdenv = throw "Use effectiveStdenv instead";
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;

suffices =
lib.optionals useOpenCL [ "OpenCL" ]
++ lib.optionals useCuda [ "CUDA" ]
++ lib.optionals useRocm [ "ROCm" ]
++ lib.optionals useMetalKit [ "MetalKit" ]
++ lib.optionals useBlas [ "BLAS" ];

pnameSuffix =
strings.optionalString (suffices != [ ])
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
descriptionSuffix =
strings.optionalString (suffices != [ ])
", accelerated with ${strings.concatStringsSep ", " suffices}";

# TODO: package the Python in this repository in a Nix-like way.
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
# https://peps.python.org/pep-0517/
llama-python = python3.withPackages (
ps: [
ps.numpy
ps.sentencepiece
]
);

# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra = python3.withPackages (
ps: [
ps.numpy
ps.sentencepiece
ps.torchWithoutCuda
ps.transformers
]
);

# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
# separately
darwinBuildInputs =
with darwin.apple_sdk.frameworks;
[
Accelerate
CoreVideo
CoreGraphics
]
++ optionals useMetalKit [ MetalKit ];

cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target>
cuda_cudart
libcublas
];

rocmBuildInputs = with rocmPackages; [
clr
hipblas
rocblas
];
in

effectiveStdenv.mkDerivation (
finalAttrs: {
pname = "llama-cpp${pnameSuffix}";
version = llamaVersion;

src = ../../.;
meta = {
description = "LLaMA model in pure C/C++${descriptionSuffix}";
mainProgram = "llama";
};

postPatch = ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"

# TODO: Package up each Python script or service appropriately.
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
# we could make those *.py into setuptools' entrypoints
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
'';

nativeBuildInputs = [
cmake
ninja
pkg-config
git
] ++ optionals useCuda [ cudaPackages.cuda_nvcc ];

buildInputs =
[ mpi ]
++ optionals useOpenCL [ clblast ]
++ optionals useCuda cudaBuildInputs
++ optionals useRocm rocmBuildInputs
++ optionals effectiveStdenv.isDarwin darwinBuildInputs;

cmakeFlags =
[
(cmakeBool "LLAMA_NATIVE" true)
(cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "BUILD_SHARED_LIBS" true)
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
(cmakeBool "LLAMA_METAL" useMetalKit)
(cmakeBool "LLAMA_BLAS" useBlas)
]
++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
++ optionals useRocm [
(cmakeBool "LLAMA_HIPBLAS" true)
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
# Should likely use `rocmPackages.clr.gpuTargets`.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
]
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];

# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
postInstall = ''
mv $out/bin/main $out/bin/llama
mv $out/bin/server $out/bin/llama-server
mkdir -p $out/include
cp $src/llama.h $out/include/
'';

# Define the shells here, but don't add in the inputsFrom to avoid recursion.
passthru = {
shell = mkShell {
name = "shell-${finalAttrs.finalPackage.name}";
description = "contains numpy and sentencepiece";
buildInputs = [ llama-python ];
inputsFrom = [ finalAttrs.finalPackage ];
};

shell-extra = mkShell {
name = "shell-extra-${finalAttrs.finalPackage.name}";
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
buildInputs = [ llama-python-extra ];
inputsFrom = [ finalAttrs.finalPackage ];
};
};
}
)
12 changes: 12 additions & 0 deletions .devops/nix/scope.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
lib,
newScope,
llamaVersion ? "0.0.0",
}:

lib.makeScope newScope (
self: {
inherit llamaVersion;
llama-cpp = self.callPackage ./package.nix { };
}
)
23 changes: 23 additions & 0 deletions .github/workflows/nix-flakestry.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Make the flake discoverable on https://flakestry.dev
name: "Publish a flake to flakestry"
on:
push:
tags:
- "v?[0-9]+.[0-9]+.[0-9]+"
- "v?[0-9]+.[0-9]+"
workflow_dispatch:
inputs:
tag:
description: "The existing tag to publish"
type: "string"
required: true
jobs:
publish-flake:
runs-on: ubuntu-latest
permissions:
id-token: "write"
contents: "read"
steps:
- uses: flakestry/flakestry-publish@main
with:
version: "${{ inputs.tag || github.ref_name }}"
55 changes: 29 additions & 26 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading