diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index cd114c0bb..83f1a888d 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -48,12 +48,12 @@ jobs:
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v3
with:
path: ./build/libllama.so
name: llama-bin-linux-${{ matrix.build }}-x64.so
- name: Upload Llava
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: ./build/examples/llava/libllava_shared.so
name: llava-bin-linux-${{ matrix.build }}-x64.so
@@ -89,13 +89,13 @@ jobs:
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-${{ matrix.build }}-x64.dll
- name: Upload Llava
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: .\build\bin\Release\llava_shared.dll
name: llava-bin-win-${{ matrix.build }}-x64.dll
@@ -169,20 +169,35 @@ jobs:
ls -R
- name: Upload artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: |
.\build\bin\Release\llama.dll
.\build\bin\Release\clblast.dll
name: llama-bin-win-clblast-x64.dll
+ - name: Upload llava artifacts (Windows)
+ if: ${{ matrix.os == 'windows-latest' }}
+ uses: actions/upload-artifact@v3
+ with:
+ path: |
+ .\build\bin\Release\llava_shared.dll
+ name: llava-bin-win-clblast-x64.dll
- name: Upload artifacts (linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: |
./build/libllama.so
# ./build/libclblast.so
name: llama-bin-linux-clblast-x64.so
+ - name: Upload llava artifacts (linux)
+ if: ${{ matrix.os == 'ubuntu-22.04' }}
+ uses: actions/upload-artifact@v3
+ with:
+ path: |
+ ./build/examples/llava/libllava_shared.so
+ name: llava-bin-linux-clblast-x64.so
+
compile-cublas:
name: Compile (cublas)
@@ -228,16 +243,29 @@ jobs:
- name: Upload artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
+ - name: Upload llava artifacts (Windows)
+ if: ${{ matrix.os == 'windows-latest' }}
+ uses: actions/upload-artifact@v3
+ with:
+ path: .\build\bin\Release\llava_shared.dll
+ name: llava-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
- name: Upload artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: ./build/libllama.so
name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
+ - name: Upload llava artifacts (Linux)
+ if: ${{ matrix.os == 'ubuntu-20.04' }}
+ uses: actions/upload-artifact@v3
+ with:
+ path: ./build/examples/llava/libllava_shared.so
+ name: llava-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
+
compile-macos:
name: Compile (MacOS)
@@ -268,18 +296,18 @@ jobs:
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: ./build/libllama.dylib
name: llama-bin-osx-${{ matrix.build }}.dylib
- name: Upload Llava
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: ./build/examples/llava/libllava_shared.dylib
name: llava-bin-osx-${{ matrix.build }}.dylib
- name: Upload Metal
if: ${{ matrix.build != 'x64' }}
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: ./build/bin/ggml-metal.metal
name: ggml-metal.metal
@@ -347,11 +375,12 @@ jobs:
cp artifacts/llama-bin-linux-clblast-x64.so/libllama.so deps/clblast/
- name: Upload artifacts
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v3
with:
path: deps/
name: deps
+
- name: Remove Artifacts
uses: geekyeggo/delete-artifact@v2
with:
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5d7377f1e..26d352079 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -28,14 +28,14 @@ jobs:
os: windows-2019
config: release
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-dotnet@v3
+ - uses: actions/checkout@v4
+ - uses: actions/setup-dotnet@v4
with:
dotnet-version: |
7.0.x
8.0.x
- name: Cache Packages
- uses: actions/cache@v3
+ uses: actions/cache@v4
with:
key: "unit_test_models"
path: LLama.Unittest/Models
diff --git a/LLama.Unittest/Constants.cs b/LLama.Unittest/Constants.cs
index 49e4906ea..6e6324491 100644
--- a/LLama.Unittest/Constants.cs
+++ b/LLama.Unittest/Constants.cs
@@ -3,5 +3,8 @@
internal static class Constants
{
public static string ModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
+ public static string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
+ public static string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
+ public static string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
}
}
diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
index 920e6e98f..f6bebea73 100644
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -27,8 +27,9 @@
-
-
+
+
+
@@ -44,5 +45,14 @@
PreserveNewest
+
+ PreserveNewest
+
+
+ PreserveNewest
+
+
+ PreserveNewest
+
diff --git a/LLama.Unittest/LLamaEmbedderTests.cs b/LLama.Unittest/LLamaEmbedderTests.cs
index ca15dc6f4..9935fc863 100644
--- a/LLama.Unittest/LLamaEmbedderTests.cs
+++ b/LLama.Unittest/LLamaEmbedderTests.cs
@@ -14,6 +14,8 @@ public LLamaEmbedderTests(ITestOutputHelper testOutputHelper)
_testOutputHelper = testOutputHelper;
var @params = new ModelParams(Constants.ModelPath)
{
+ ContextSize = 4096,
+ Threads = 5,
EmbeddingMode = true,
};
using var weights = LLamaWeights.LoadFromFile(@params);
@@ -31,6 +33,7 @@ private static float Dot(float[] a, float[] b)
return a.Zip(b, (x, y) => x * y).Sum();
}
+
[Fact]
public async Task EmbedCompare()
{
diff --git a/LLama.Unittest/LLavaWeightsTests.cs b/LLama.Unittest/LLavaWeightsTests.cs
new file mode 100644
index 000000000..beeb2fc49
--- /dev/null
+++ b/LLama.Unittest/LLavaWeightsTests.cs
@@ -0,0 +1,53 @@
+using LLama.Common;
+using LLama.Native;
+
+namespace LLama.Unittest
+{
+ // Test the same things as llama model + image embedings
+ //
+ public sealed class LLavaWeightTests
+ : IDisposable
+ {
+ private readonly LLamaWeights _llamaWeights;
+ private readonly LLavaWeights _lLavaWeights;
+ private readonly LLamaContext _context;
+
+ public LLavaWeightTests()
+ {
+ var @params = new ModelParams(Constants.ModelPath)
+ {
+ // Llava models requires big context
+ ContextSize = 4096
+ };
+ _llamaWeights = LLamaWeights.LoadFromFile(@params);
+ _lLavaWeights = LLavaWeights.LoadFromFile(Constants.LLavaMmpPath);
+
+ _context = _llamaWeights.CreateContext(@params);
+
+ }
+
+ public void Dispose()
+ {
+ _llamaWeights.Dispose();
+ _lLavaWeights.Dispose();
+ }
+
+
+
+ [Fact]
+ public void EmbedImageAsFileName()
+ {
+ int n_past = 0;
+ Assert.True( _lLavaWeights.EmbedImage( _context, Constants.LLavaImage, ref n_past ) );
+ }
+
+ [Fact]
+ public void EmbedImageAsBinary()
+ {
+ int n_past = 0;
+ byte[] image = System.IO.File.ReadAllBytes(Constants.LLavaImage);
+ Assert.True( _lLavaWeights.EmbedImage( _context, image, ref n_past ) );
+ }
+
+ }
+}
diff --git a/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg b/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg
new file mode 100644
index 000000000..078fde7c4
Binary files /dev/null and b/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg differ
diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index f26ad24e0..35534d3fb 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -67,5 +67,51 @@
PreserveNewest
runtimes/osx-x64/native/libllama.dylib
+
+
+ PreserveNewest
+ runtimes/win-x64/native/noavx/llava_shared.dll
+
+
+ PreserveNewest
+ runtimes/win-x64/native/avx/llava_shared.dll
+
+
+ PreserveNewest
+ runtimes/win-x64/native/avx2/llava_shared.dll
+
+
+ PreserveNewest
+ runtimes/win-x64/native/avx512/llava_shared.dll
+
+
+
+ PreserveNewest
+ runtimes/linux-x64/native/noavx/libllava_shared.so
+
+
+ PreserveNewest
+ runtimes/linux-x64/native/avx/libllava_shared.so
+
+
+ PreserveNewest
+ runtimes/linux-x64/native/avx2/libllava_shared.so
+
+
+ PreserveNewest
+ runtimes/linux-x64/native/avx512/libllava_shared.so
+
+
+
+ PreserveNewest
+ runtimes/osx-arm64/native/libllava_shared.dylib
+
+
+
+ PreserveNewest
+ runtimes/osx-x64/native/libllava_shared.dylib
+
+
+
\ No newline at end of file
diff --git a/LLama/LLavaWeights.cs b/LLama/LLavaWeights.cs
new file mode 100644
index 000000000..301fb7293
--- /dev/null
+++ b/LLama/LLavaWeights.cs
@@ -0,0 +1,51 @@
+
+using System;
+using LLama.Native;
+
+namespace LLama;
+
+public sealed class LLavaWeights : IDisposable
+{
+ public SafeLlavaModelHandle NativeHandle { get; }
+
+ internal LLavaWeights(SafeLlavaModelHandle weights)
+ {
+ NativeHandle = weights;
+ }
+
+ public static LLavaWeights LoadFromFile(string mmProject)
+ {
+ var weights = SafeLlavaModelHandle.LoadFromFile(mmProject, 1);
+ return new LLavaWeights(weights);
+ }
+
+ ///
+ /// Embed the image from file into llama context
+ ///
+ ///
+ ///
+ ///
+ ///
+ public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past )
+ {
+ return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
+ }
+
+ ///
+ /// Embed the image from binary into llama context.
+ ///
+ ///
+ ///
+ ///
+ ///
+ public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, ref int n_past )
+ {
+ return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
+ }
+
+ public void Dispose()
+ {
+ NativeHandle.Dispose();
+ }
+
+}
\ No newline at end of file
diff --git a/LLama/Native/LLavaImageEmbed.cs b/LLama/Native/LLavaImageEmbed.cs
new file mode 100644
index 000000000..2030515ec
--- /dev/null
+++ b/LLama/Native/LLavaImageEmbed.cs
@@ -0,0 +1,13 @@
+using System.Runtime.InteropServices;
+
+namespace LLama.Native;
+
+///
+/// LLaVa Image embeddings
+///
+[StructLayout(LayoutKind.Sequential)]
+unsafe public struct LLavaImageEmbed
+{
+ public float* embed;
+ public int n_image_pos;
+}
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.LLava.cs b/LLama/Native/NativeApi.LLava.cs
new file mode 100644
index 000000000..7930e3755
--- /dev/null
+++ b/LLama/Native/NativeApi.LLava.cs
@@ -0,0 +1,60 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace LLama.Native;
+
+using clip_ctx = IntPtr;
+public static unsafe partial class NativeApi
+{
+ ///
+ /// Sanity check for clip <-> llava embed size match
+ ///
+ ///
+ [DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)]
+ public static extern bool llava_validate_embed_size( SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip);
+
+ ///
+ /// Build an image embed from image file bytes
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes",
+ CallingConvention = CallingConvention.Cdecl)]
+ public static extern
+ SafeLlavaImageEmbedHandle llava_image_embed_make_with_bytes(SafeLlavaModelHandle ctx_clip, int n_threads,
+ byte[] image_bytes, int image_bytes_length);
+
+ ///
+ /// Build an image embed from a path to an image filename
+ ///
+ ///
+ ///
+ ///
+ ///
+ [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", CallingConvention = CallingConvention.Cdecl)]
+ public static extern
+ SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHandle ctx_clip, int n_threads,
+ [MarshalAs(UnmanagedType.LPStr)] string image_path);
+
+ ///
+ /// Free an embedding made with llava_image_embed_make_*
+ ///
+ ///
+ ///
+ [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)]
+ public static extern SafeLlavaImageEmbedHandle llava_image_embed_free(IntPtr embed);
+
+ ///
+ /// Write the image represented by embed into the llama context with batch size n_batch, starting at context
+ /// pos n_past. on completion, n_past points to the next position in the context after the image embed.
+ ///
+ /// ctx_llama
+ ///
+ [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)]
+ public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, SafeLlavaImageEmbedHandle embed,
+ int n_batch, ref int n_past);
+
+}
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs
index c6c04e211..1b1868161 100644
--- a/LLama/Native/NativeApi.Load.cs
+++ b/LLama/Native/NativeApi.Load.cs
@@ -235,6 +235,7 @@ private static List GetLibraryTryOrder(NativeLibraryConfig.Description c
if (platform == OSPlatform.OSX)
{
result.Add($"{prefix}{libraryNamePrefix}{libraryName}{suffix}");
+ result.Add($"{prefix}{libraryNamePrefix}{llavaLibraryName}{suffix}");
}
return result;
@@ -303,6 +304,11 @@ string TryFindPath(string filename)
if (result is not null && result != IntPtr.Zero)
{
Log($"{fullPath} is selected and loaded successfully.", LogLevel.Information);
+
+ // One we have clear the detection and that llama loads successfully we load LLaVa if exist on the
+ // same path.
+ TryLoad( libraryPath.Replace("llama", "llava_shared"), true);
+
return (IntPtr)result;
}
@@ -338,6 +344,7 @@ string TryFindPath(string filename)
}
internal const string libraryName = "llama";
+ internal const string llavaLibraryName = "llava_shared";
private const string cudaVersionFile = "version.json";
private const string loggingPrefix = "[LLamaSharp Native]";
private static bool enableLogging = false;
diff --git a/LLama/Native/SafeLlavaImageEmbedHandle.cs b/LLama/Native/SafeLlavaImageEmbedHandle.cs
new file mode 100644
index 000000000..f1c62a2d1
--- /dev/null
+++ b/LLama/Native/SafeLlavaImageEmbedHandle.cs
@@ -0,0 +1,45 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using LLama;
+using LLama.Exceptions;
+
+
+namespace LLama.Native
+{
+ ///
+ /// A Reference to a set of llava Image Embed handle
+ ///
+ public sealed class SafeLlavaImageEmbedHandle
+ : SafeLLamaHandleBase
+ {
+
+ private SafeLlavaImageEmbedHandle(IntPtr handle)
+ : base(handle, true)
+ {
+ }
+
+ private SafeLlavaImageEmbedHandle()
+ {}
+
+ public static SafeLlavaImageEmbedHandle CreateFromFileName( SafeLlavaModelHandle ctxLlava, LLamaContext ctxLlama, string image )
+ {
+ return NativeApi.llava_image_embed_make_with_filename(ctxLlava, (int) ctxLlama.BatchThreads, image);
+ }
+
+ public static SafeLlavaImageEmbedHandle CreateFromMemory( SafeLlavaModelHandle ctxLlava, LLamaContext ctxLlama, Byte[] image )
+ {
+ return NativeApi.llava_image_embed_make_with_bytes(ctxLlava, (int) ctxLlama.BatchThreads, image, image.Length);
+ }
+
+ ///
+ protected override bool ReleaseHandle()
+ {
+ NativeApi.llava_image_embed_free(DangerousGetHandle());
+ SetHandle(IntPtr.Zero);
+ return true;
+ }
+ }
+}
diff --git a/LLama/Native/SafeLlavaModelHandle.cs b/LLama/Native/SafeLlavaModelHandle.cs
new file mode 100644
index 000000000..f95440412
--- /dev/null
+++ b/LLama/Native/SafeLlavaModelHandle.cs
@@ -0,0 +1,104 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Text;
+using LLama;
+using LLama.Exceptions;
+
+
+namespace LLama.Native
+{
+ ///
+ /// A reference to a set of llava model weights
+ ///
+ public sealed class SafeLlavaModelHandle
+ : SafeLLamaHandleBase
+ {
+
+ private SafeLlavaModelHandle(IntPtr handle)
+ : base(handle, true)
+ {
+ }
+
+ private SafeLlavaModelHandle()
+ {}
+
+ ///
+ protected override bool ReleaseHandle()
+ {
+ clip_free(DangerousGetHandle());
+ SetHandle(IntPtr.Zero);
+ return true;
+ }
+
+ ///
+ /// Load a model from the given file path into memory
+ ///
+ ///
+ ///
+ ///
+ ///
+ public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity )
+ {
+
+ // Try to open the model file, this will check:
+ // - File exists (automatically throws FileNotFoundException)
+ // - File is readable (explicit check)
+ // This provides better error messages that llama.cpp, which would throw an access violation exception in both cases.
+ using (var fs = new FileStream(modelPath, FileMode.Open))
+ if (!fs.CanRead)
+ throw new InvalidOperationException($"Llava MMP Model file '{modelPath}' is not readable");
+
+ return clip_model_load(modelPath, verbosity)
+ ?? throw new RuntimeError($"Failed to load LLaVa model {modelPath}.");
+ }
+
+ ///
+ /// Embed the image from file in llama context
+ ///
+ ///
+ ///
+ ///
+ ///
+ public bool EmbedImage(LLamaContext ctxLlama, string image, ref int n_past)
+ {
+ var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromFileName(this, ctxLlama, image);
+ bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past );
+ return result;
+ }
+
+ ///
+ /// Embed the image from binary in llama context
+ ///
+ ///
+ /// jpeg image
+ ///
+ ///
+ public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, ref int n_past )
+ {
+ var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromMemory(this, ctxLlama, image );
+ bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past );
+ return result;
+ }
+
+ ///
+ /// Load MULTI MODAL PROJECTIONS model / Clip Model
+ ///
+ /// Model path/file
+ /// Verbosity level
+ /// SafeLlavaModelHandle
+ [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_model_load", CallingConvention = CallingConvention.Cdecl)]
+ private static extern SafeLlavaModelHandle clip_model_load(string mmProj, int verbosity);
+
+ ///
+ /// Frees MULTI MODAL PROJECTIONS model / Clip Model
+ ///
+ ///
+ [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)]
+ private static extern void clip_free(IntPtr ctx);
+
+
+ }
+}