Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 93 additions & 1 deletion .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,72 @@ jobs:
name: llava-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error

compile-musl:
name: Compile (musl)
strategy:
fail-fast: true
matrix:
include:
- build: 'noavx'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DGGML_AVX512=ON'
runs-on: ubuntu-20.04
container:
image: alpine:latest
steps:
- name: Install dependencies
run: |
apk update && apk add --no-cache \
build-base \
cmake \
git \
linux-headers \
g++
- uses: actions/checkout@v4
with:
repository: ggerganov/llama.cpp
fetch-depth: 0
ref: '${{ github.event.inputs.llama_cpp_commit }}'
- name: Build
id: cmake_build_musl
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libllama.so
name: llama-bin-musl-${{ matrix.build }}-x64.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libggml.so
name: ggml-bin-musl-${{ matrix.build }}-x64.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libggml-base.so
name: ggml-base-bin-musl-${{ matrix.build }}-x64.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libggml-cpu.so
name: ggml-cpu-bin-musl-${{ matrix.build }}-x64.so
if-no-files-found: error
- name: Upload Llava
uses: actions/upload-artifact@v4
with:
path: ./build/bin/libllava_shared.so
name: llava-bin-musl-${{ matrix.build }}-x64.so
if-no-files-found: error

compile-windows:
name: Compile (Windows)
strategy:
Expand Down Expand Up @@ -519,6 +585,7 @@ jobs:
if: ${{ always() }}
needs: [
"compile-linux",
"compile-musl",
"compile-windows",
"compile-vulkan",
"compile-cublas",
Expand All @@ -534,7 +601,7 @@ jobs:
- name: Rearrange Files
run: |
# Make all directories at once
mkdir --parents deps/{noavx,avx,avx2,avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}

# Linux
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/noavx/libggml.so
Expand All @@ -561,6 +628,31 @@ jobs:
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so

# Musl
cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so deps/musl-noavx/libggml.so
cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so
cp artifacts/ggml-cpu-bin-musl-noavx-x64.so/libggml-cpu.so deps/musl-noavx/libggml-cpu.so
cp artifacts/llama-bin-musl-noavx-x64.so/libllama.so deps/musl-noavx/libllama.so
cp artifacts/llava-bin-musl-noavx-x64.so/libllava_shared.so deps/musl-noavx/libllava_shared.so

cp artifacts/ggml-bin-musl-avx-x64.so/libggml.so deps/musl-avx/libggml.so
cp artifacts/ggml-base-bin-musl-avx-x64.so/libggml-base.so deps/musl-avx/libggml-base.so
cp artifacts/ggml-cpu-bin-musl-avx-x64.so/libggml-cpu.so deps/musl-avx/libggml-cpu.so
cp artifacts/llama-bin-musl-avx-x64.so/libllama.so deps/musl-avx/libllama.so
cp artifacts/llava-bin-musl-avx-x64.so/libllava_shared.so deps/musl-avx/libllava_shared.so

cp artifacts/ggml-bin-musl-avx2-x64.so/libggml.so deps/musl-avx2/libggml.so
cp artifacts/ggml-base-bin-musl-avx2-x64.so/libggml-base.so deps/musl-avx2/libggml-base.so
cp artifacts/ggml-cpu-bin-musl-avx2-x64.so/libggml-cpu.so deps/musl-avx2/libggml-cpu.so
cp artifacts/llama-bin-musl-avx2-x64.so/libllama.so deps/musl-avx2/libllama.so
cp artifacts/llava-bin-musl-avx2-x64.so/libllava_shared.so deps/musl-avx2/libllava_shared.so

cp artifacts/ggml-bin-musl-avx512-x64.so/libggml.so deps/musl-avx512/libggml.so
cp artifacts/ggml-base-bin-musl-avx512-x64.so/libggml-base.so deps/musl-avx512/libggml-base.so
cp artifacts/ggml-cpu-bin-musl-avx512-x64.so/libggml-cpu.so deps/musl-avx512/libggml-cpu.so
cp artifacts/llama-bin-musl-avx512-x64.so/libllama.so deps/musl-avx512/libllama.so
cp artifacts/llava-bin-musl-avx512-x64.so/libllava_shared.so deps/musl-avx512/libllava_shared.so

# Windows
cp artifacts/ggml-bin-win-noavx-x64.dll/ggml.dll deps/noavx/ggml.dll
cp artifacts/ggml-base-bin-win-noavx-x64.dll/ggml-base.dll deps/noavx/ggml-base.dll
Expand Down
3 changes: 2 additions & 1 deletion LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
{
ContextSize = config.ContextSize,
GpuLayerCount = config.GpuLayerCount ?? 20,
Embeddings = true,

PoolingType = LLamaPoolingType.Mean,
};

_weights = LLamaWeights.LoadFromFile(@params);
_embedder = new LLamaEmbedder(_weights, @params);
_ownsWeights = true;
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/KernelMemory/ITextTokenizerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
_testOutputHelper = testOutputHelper;

_infParams = new() { AntiPrompts = ["\n\n"] };
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams };
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams, ContextSize = 512 };

testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
}
Expand Down
14 changes: 14 additions & 0 deletions LLama.Unittest/LLamaContextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ public LLamaContextTests()
var @params = new ModelParams(Constants.GenerativeModelPath2)
{
ContextSize = 128,
BatchSize = 8,
UBatchSize = 8,
SeqMax = 1,
VocabOnly = false,
GpuLayerCount = Constants.CIGpuLayerCount,
};
_weights = LLamaWeights.LoadFromFile(@params);
Expand Down Expand Up @@ -84,6 +88,11 @@ public void TokenizeEmpty()
[Fact]
public void SaveLoadState()
{
// Make sure there's something in the context worth saving
var batch = new LLamaBatch();
batch.Add(17, 0, LLamaSeqId.Zero, true);
_context.Decode(batch);

using var state1 = _context.GetState();

var stream = new MemoryStream();
Expand All @@ -99,6 +108,11 @@ public void SaveLoadState()
[Fact]
public async Task SaveLoadStateAsync()
{
// Make sure there's something in the context worth saving
var batch = new LLamaBatch();
batch.Add(17, 0, LLamaSeqId.Zero, true);
_context.Decode(batch);

using var state1 = _context.GetState();

var stream = new MemoryStream();
Expand Down
2 changes: 1 addition & 1 deletion LLama/Batched/Conversation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ public Conversation Fork()
_forked = true;

// Assign tokens to the new sequence
NativeApi.llama_kv_cache_seq_cp(Executor.Context.NativeHandle, ConversationId, c.ConversationId, 0, _end);
Executor.Context.NativeHandle.KvCacheSequenceCopy(ConversationId, c.ConversationId, 0, _end);

return c;
}
Expand Down
4 changes: 2 additions & 2 deletions LLama/LLamaExecutorBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ protected virtual void HandleRunOutOfContext(int tokensToKeep)
int n_left = _pastTokensCount - tokensToKeep;
int n_discard = n_left / 2;

NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep, tokensToKeep + n_discard);
NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep + n_discard, _pastTokensCount, -n_discard);
NativeApi.llama_kv_self_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep, tokensToKeep + n_discard);
NativeApi.llama_kv_self_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep + n_discard, _pastTokensCount, -n_discard);

_pastTokensCount -= n_discard;
// stop saving session if we run out of context
Expand Down
70 changes: 70 additions & 0 deletions LLama/LLamaSharp.Runtime.targets
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-x64/native/avx512/libggml-cpu.so</Link>
</None>


<None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
Expand Down Expand Up @@ -253,6 +254,75 @@
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-x64/native/vulkan/libggml-vulkan.so</Link>
</None>


<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libllama.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/noavx/libllama.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/noavx/libggml.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml-base.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/noavx/libggml-base.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml-cpu.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/noavx/libggml-cpu.so</Link>
</None>

<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libllama.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx/libllama.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx/libggml.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml-base.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx/libggml-base.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml-cpu.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx/libggml-cpu.so</Link>
</None>

<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libllama.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx2/libllama.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx2/libggml.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml-base.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx2/libggml-base.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml-cpu.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx2/libggml-cpu.so</Link>
</None>

<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libllama.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx512/libllama.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx512/libggml.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml-base.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx512/libggml-base.so</Link>
</None>
<None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml-cpu.so">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<Link>runtimes/linux-musl-x64/native/avx512/libggml-cpu.so</Link>
</None>


<None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-base.dylib">
Expand Down
2 changes: 1 addition & 1 deletion LLama/LLamaSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
</ItemGroup>

<PropertyGroup>
<BinaryReleaseId>5783575c9d99</BinaryReleaseId>
<BinaryReleaseId>be7c3034108473be</BinaryReleaseId>
</PropertyGroup>

<PropertyGroup>
Expand Down
4 changes: 2 additions & 2 deletions LLama/LLamaStatelessExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams
var n_left = n_past - tokensKeep;
var n_discard = n_left / 2;

NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep , tokensKeep + n_discard);
NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep + n_discard, n_past, -n_discard);
NativeApi.llama_kv_self_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep , tokensKeep + n_discard);
NativeApi.llama_kv_self_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep + n_discard, n_past, -n_discard);

n_past -= n_discard;
}
Expand Down
10 changes: 10 additions & 0 deletions LLama/Native/LLamaKvCache.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
namespace LLama.Native;

/// <summary>
/// C# representation of llama_kv_cache
/// </summary>
/// <remarks>llama_kv_cache</remarks>
internal struct LLamaKvCacheNative
{

}
5 changes: 4 additions & 1 deletion LLama/Native/LLamaVocabPreType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace LLama.Native;
///
/// </summary>
/// <remarks>llama_vocab_pre_type</remarks>
// ReSharper disable InconsistentNaming
internal enum LLamaVocabPreType
{
Default = 0,
Expand Down Expand Up @@ -36,4 +37,6 @@ internal enum LLamaVocabPreType
CHAMELEON = 26,
MINERVA = 27,
DEEPSEEK3_LLM = 28,
}
GPT4O = 29,
}
// ReSharper restore InconsistentNaming
20 changes: 16 additions & 4 deletions LLama/Native/Load/NativeLibraryUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,22 @@ public static void GetPlatformPathParts(OSPlatform platform, out string os, out

if (platform == OSPlatform.Linux)
{
os = "linux-x64";
fileExtension = ".so";
libPrefix = "lib";
return;
if(RuntimeInformation.RuntimeIdentifier.ToLower().StartsWith("alpine"))
{
// alpine linux distro
os = "linux-musl-x64";
fileExtension = ".so";
libPrefix = "lib";
return;
}
else
{
// other linux distro
os = "linux-x64";
fileExtension = ".so";
libPrefix = "lib";
return;
}
}

if (platform == OSPlatform.OSX)
Expand Down
Loading
Loading