SciSharp · martindevans · Mar 20, 2025 · Feb 11, 2025 · Feb 14, 2025 · Mar 9, 2025
diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
@@ -76,6 +76,72 @@ jobs:
           name: llava-bin-linux-${{ matrix.build }}-x64.so
           if-no-files-found: error
 
+  compile-musl:
+    name: Compile (musl)
+    strategy:
+      fail-fast: true
+      matrix:
+        include:
+          - build: 'noavx'
+            defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
+          - build: 'avx2'
+            defines: ''
+          - build: 'avx'
+            defines: '-DGGML_AVX2=OFF'
+          - build: 'avx512'
+            defines: '-DGGML_AVX512=ON'
+    runs-on: ubuntu-20.04
+    container:
+      image: alpine:latest
+    steps:
+      - name: Install dependencies
+        run: |
+          apk update && apk add --no-cache \
+            build-base \
+            cmake \
+            git \
+            linux-headers \
+            g++
+      - uses: actions/checkout@v4
+        with:
+          repository: ggerganov/llama.cpp
+          fetch-depth: 0
+          ref: '${{ github.event.inputs.llama_cpp_commit }}'
+      - name: Build
+        id: cmake_build_musl
+        run: |
+          mkdir build
+          cd build
+          cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
+          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          ls -R
+      - uses: actions/upload-artifact@v4
+        with:
+          path: ./build/bin/libllama.so
+          name: llama-bin-musl-${{ matrix.build }}-x64.so
+          if-no-files-found: error
+      - uses: actions/upload-artifact@v4
+        with:
+          path: ./build/bin/libggml.so
+          name: ggml-bin-musl-${{ matrix.build }}-x64.so
+          if-no-files-found: error
+      - uses: actions/upload-artifact@v4
+        with:
+          path: ./build/bin/libggml-base.so
+          name: ggml-base-bin-musl-${{ matrix.build }}-x64.so
+          if-no-files-found: error
+      - uses: actions/upload-artifact@v4
+        with:
+          path: ./build/bin/libggml-cpu.so
+          name: ggml-cpu-bin-musl-${{ matrix.build }}-x64.so
+          if-no-files-found: error
+      - name: Upload Llava
+        uses: actions/upload-artifact@v4
+        with:
+          path: ./build/bin/libllava_shared.so
+          name: llava-bin-musl-${{ matrix.build }}-x64.so
+          if-no-files-found: error
+
   compile-windows:
     name: Compile (Windows)
     strategy:
@@ -519,6 +585,7 @@ jobs:
     if: ${{ always() }}
     needs: [
       "compile-linux",
+      "compile-musl",
       "compile-windows",
       "compile-vulkan",
       "compile-cublas",
@@ -534,7 +601,7 @@ jobs:
       - name: Rearrange Files
         run: |
           # Make all directories at once
-          mkdir --parents deps/{noavx,avx,avx2,avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
+          mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
 
           # Linux
           cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so           deps/noavx/libggml.so
@@ -561,6 +628,31 @@ jobs:
           cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so         deps/avx512/libllama.so
           cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so  deps/avx512/libllava_shared.so
 
+          # Musl
+          cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so           deps/musl-noavx/libggml.so
+          cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so
+          cp artifacts/ggml-cpu-bin-musl-noavx-x64.so/libggml-cpu.so   deps/musl-noavx/libggml-cpu.so
+          cp artifacts/llama-bin-musl-noavx-x64.so/libllama.so         deps/musl-noavx/libllama.so
+          cp artifacts/llava-bin-musl-noavx-x64.so/libllava_shared.so  deps/musl-noavx/libllava_shared.so
+
+          cp artifacts/ggml-bin-musl-avx-x64.so/libggml.so             deps/musl-avx/libggml.so
+          cp artifacts/ggml-base-bin-musl-avx-x64.so/libggml-base.so   deps/musl-avx/libggml-base.so
+          cp artifacts/ggml-cpu-bin-musl-avx-x64.so/libggml-cpu.so     deps/musl-avx/libggml-cpu.so
+          cp artifacts/llama-bin-musl-avx-x64.so/libllama.so           deps/musl-avx/libllama.so
+          cp artifacts/llava-bin-musl-avx-x64.so/libllava_shared.so    deps/musl-avx/libllava_shared.so
+
+          cp artifacts/ggml-bin-musl-avx2-x64.so/libggml.so            deps/musl-avx2/libggml.so
+          cp artifacts/ggml-base-bin-musl-avx2-x64.so/libggml-base.so  deps/musl-avx2/libggml-base.so
+          cp artifacts/ggml-cpu-bin-musl-avx2-x64.so/libggml-cpu.so    deps/musl-avx2/libggml-cpu.so
+          cp artifacts/llama-bin-musl-avx2-x64.so/libllama.so          deps/musl-avx2/libllama.so
+          cp artifacts/llava-bin-musl-avx2-x64.so/libllava_shared.so   deps/musl-avx2/libllava_shared.so
+
+          cp artifacts/ggml-bin-musl-avx512-x64.so/libggml.so           deps/musl-avx512/libggml.so
+          cp artifacts/ggml-base-bin-musl-avx512-x64.so/libggml-base.so deps/musl-avx512/libggml-base.so
+          cp artifacts/ggml-cpu-bin-musl-avx512-x64.so/libggml-cpu.so   deps/musl-avx512/libggml-cpu.so
+          cp artifacts/llama-bin-musl-avx512-x64.so/libllama.so         deps/musl-avx512/libllama.so
+          cp artifacts/llava-bin-musl-avx512-x64.so/libllava_shared.so  deps/musl-avx512/libllava_shared.so
+
           # Windows
           cp artifacts/ggml-bin-win-noavx-x64.dll/ggml.dll            deps/noavx/ggml.dll
           cp artifacts/ggml-base-bin-win-noavx-x64.dll/ggml-base.dll  deps/noavx/ggml-base.dll

diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
@@ -33,9 +33,10 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
             {
                 ContextSize = config.ContextSize,
                 GpuLayerCount = config.GpuLayerCount ?? 20,
-                Embeddings = true,
+
                 PoolingType = LLamaPoolingType.Mean,
             };
+
             _weights = LLamaWeights.LoadFromFile(@params);
             _embedder = new LLamaEmbedder(_weights, @params);
             _ownsWeights = true;

diff --git a/LLama.Unittest/KernelMemory/ITextTokenizerTests.cs b/LLama.Unittest/KernelMemory/ITextTokenizerTests.cs
@@ -22,7 +22,7 @@ public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
             _testOutputHelper = testOutputHelper;
 
             _infParams = new() { AntiPrompts = ["\n\n"] };
-            _lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams };
+            _lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams, ContextSize = 512 };
 
             testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
         }        

diff --git a/LLama.Unittest/LLamaContextTests.cs b/LLama.Unittest/LLamaContextTests.cs
@@ -14,6 +14,10 @@ public LLamaContextTests()
             var @params = new ModelParams(Constants.GenerativeModelPath2)
             {
                 ContextSize = 128,
+                BatchSize = 8,
+                UBatchSize = 8,
+                SeqMax = 1,
+                VocabOnly = false,
                 GpuLayerCount = Constants.CIGpuLayerCount,
             };
             _weights = LLamaWeights.LoadFromFile(@params);
@@ -84,6 +88,11 @@ public void TokenizeEmpty()
         [Fact]
         public void SaveLoadState()
         {
+            // Make sure there's something in the context worth saving
+            var batch = new LLamaBatch();
+            batch.Add(17, 0, LLamaSeqId.Zero, true);
+            _context.Decode(batch);
+
             using var state1 = _context.GetState();
 
             var stream = new MemoryStream();
@@ -99,6 +108,11 @@ public void SaveLoadState()
         [Fact]
         public async Task SaveLoadStateAsync()
         {
+            // Make sure there's something in the context worth saving
+            var batch = new LLamaBatch();
+            batch.Add(17, 0, LLamaSeqId.Zero, true);
+            _context.Decode(batch);
+
             using var state1 = _context.GetState();
 
             var stream = new MemoryStream();

diff --git a/LLama/Batched/Conversation.cs b/LLama/Batched/Conversation.cs
@@ -128,7 +128,7 @@ public Conversation Fork()
         _forked = true;
 
         // Assign tokens to the new sequence
-        NativeApi.llama_kv_cache_seq_cp(Executor.Context.NativeHandle, ConversationId, c.ConversationId, 0, _end);
+        Executor.Context.NativeHandle.KvCacheSequenceCopy(ConversationId, c.ConversationId, 0, _end);
 
         return c;
     }

diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs
@@ -193,8 +193,8 @@ protected virtual void HandleRunOutOfContext(int tokensToKeep)
             int n_left = _pastTokensCount - tokensToKeep;
             int n_discard = n_left / 2;
 
-            NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep, tokensToKeep + n_discard);
-            NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep + n_discard, _pastTokensCount, -n_discard);
+            NativeApi.llama_kv_self_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep, tokensToKeep + n_discard);
+            NativeApi.llama_kv_self_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensToKeep + n_discard, _pastTokensCount, -n_discard);
 
             _pastTokensCount -= n_discard;
             // stop saving session if we run out of context

diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
@@ -200,6 +200,7 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/avx512/libggml-cpu.so</Link>
       </None>
+
 
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
@@ -253,6 +254,75 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/vulkan/libggml-vulkan.so</Link>
       </None>
+
+
+	  <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/noavx/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/noavx/libggml.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-musl-x64/native/noavx/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-noavx/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/noavx/libggml-cpu.so</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx/libggml.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-musl-x64/native/avx/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx/libggml-cpu.so</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx2/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx2/libggml.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-musl-x64/native/avx2/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx2/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx2/libggml-cpu.so</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx512/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx512/libggml.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-musl-x64/native/avx512/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/musl-avx512/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-musl-x64/native/avx512/libggml-cpu.so</Link>
+      </None>
 
 
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-base.dylib">

diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>5783575c9d99</BinaryReleaseId>
+    <BinaryReleaseId>be7c3034108473be</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>

diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
@@ -155,8 +155,8 @@ public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams
                     var n_left = n_past - tokensKeep;
                     var n_discard = n_left / 2;
 
-                    NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep , tokensKeep + n_discard);
-                    NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep + n_discard, n_past, -n_discard);
+                    NativeApi.llama_kv_self_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep , tokensKeep + n_discard);
+                    NativeApi.llama_kv_self_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep + n_discard, n_past, -n_discard);
 
                     n_past -= n_discard;
                 }

diff --git a/LLama/Native/LLamaKvCache.cs b/LLama/Native/LLamaKvCache.cs
@@ -0,0 +1,10 @@
+namespace LLama.Native;
+
+/// <summary>
+/// C# representation of llama_kv_cache
+/// </summary>
+/// <remarks>llama_kv_cache</remarks>
+internal struct LLamaKvCacheNative
+{
+
+}
diff --git a/LLama/Native/LLamaVocabPreType.cs b/LLama/Native/LLamaVocabPreType.cs
@@ -4,6 +4,7 @@ namespace LLama.Native;
 /// 
 /// </summary>
 /// <remarks>llama_vocab_pre_type</remarks>
+// ReSharper disable InconsistentNaming
 internal enum LLamaVocabPreType
 {
     Default = 0,
@@ -36,4 +37,6 @@ internal enum LLamaVocabPreType
     CHAMELEON = 26,
     MINERVA = 27,
     DEEPSEEK3_LLM = 28,
-}
+    GPT4O = 29,
+}
+// ReSharper restore InconsistentNaming
diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs
@@ -218,10 +218,22 @@ public static void GetPlatformPathParts(OSPlatform platform, out string os, out
 
             if (platform == OSPlatform.Linux)
             {
-                os = "linux-x64";
-                fileExtension = ".so";
-                libPrefix = "lib";
-                return;
+                if(RuntimeInformation.RuntimeIdentifier.ToLower().StartsWith("alpine"))
+                {
+                    // alpine linux distro
+                    os = "linux-musl-x64";
+                    fileExtension = ".so";
+                    libPrefix = "lib";
+                    return;
+                }
+                else
+                {
+                    // other linux distro
+                    os = "linux-x64";
+                    fileExtension = ".so";
+                    libPrefix = "lib";
+                    return;
+                }
             }
 
             if (platform == OSPlatform.OSX)