diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index cd114c0bb..83f1a888d 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -48,12 +48,12 @@ jobs: cd build cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }} cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v3 with: path: ./build/libllama.so name: llama-bin-linux-${{ matrix.build }}-x64.so - name: Upload Llava - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: ./build/examples/llava/libllava_shared.so name: llava-bin-linux-${{ matrix.build }}-x64.so @@ -89,13 +89,13 @@ jobs: cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: .\build\bin\Release\llama.dll name: llama-bin-win-${{ matrix.build }}-x64.dll - name: Upload Llava - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: .\build\bin\Release\llava_shared.dll name: llava-bin-win-${{ matrix.build }}-x64.dll @@ -169,20 +169,35 @@ jobs: ls -R - name: Upload artifacts (Windows) if: ${{ matrix.os == 'windows-latest' }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: | .\build\bin\Release\llama.dll .\build\bin\Release\clblast.dll name: llama-bin-win-clblast-x64.dll + - name: Upload llava artifacts (Windows) + if: ${{ matrix.os == 'windows-latest' }} + uses: actions/upload-artifact@v3 + with: + path: | + .\build\bin\Release\llava_shared.dll + name: llava-bin-win-clblast-x64.dll - name: Upload artifacts (linux) if: ${{ matrix.os == 'ubuntu-22.04' }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: | ./build/libllama.so # ./build/libclblast.so name: llama-bin-linux-clblast-x64.so + - name: Upload llava artifacts (linux) + if: ${{ matrix.os == 'ubuntu-22.04' }} + uses: actions/upload-artifact@v3 + with: + path: | + ./build/examples/llava/libllava_shared.so + name: llava-bin-linux-clblast-x64.so + compile-cublas: name: Compile (cublas) @@ -228,16 +243,29 @@ jobs: - name: Upload artifacts (Windows) if: ${{ matrix.os == 'windows-latest' }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: .\build\bin\Release\llama.dll name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll + - name: Upload llava artifacts (Windows) + if: ${{ matrix.os == 'windows-latest' }} + uses: actions/upload-artifact@v3 + with: + path: .\build\bin\Release\llava_shared.dll + name: llava-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll - name: Upload artifacts (Linux) if: ${{ matrix.os == 'ubuntu-20.04' }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: ./build/libllama.so name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so + - name: Upload llava artifacts (Linux) + if: ${{ matrix.os == 'ubuntu-20.04' }} + uses: actions/upload-artifact@v3 + with: + path: ./build/examples/llava/libllava_shared.so + name: llava-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so + compile-macos: name: Compile (MacOS) @@ -268,18 +296,18 @@ jobs: cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }} cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: ./build/libllama.dylib name: llama-bin-osx-${{ matrix.build }}.dylib - name: Upload Llava - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: ./build/examples/llava/libllava_shared.dylib name: llava-bin-osx-${{ matrix.build }}.dylib - name: Upload Metal if: ${{ matrix.build != 'x64' }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: ./build/bin/ggml-metal.metal name: ggml-metal.metal @@ -347,11 +375,12 @@ jobs: cp artifacts/llama-bin-linux-clblast-x64.so/libllama.so deps/clblast/ - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: path: deps/ name: deps + - name: Remove Artifacts uses: geekyeggo/delete-artifact@v2 with: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5d7377f1e..26d352079 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -28,14 +28,14 @@ jobs: os: windows-2019 config: release steps: - - uses: actions/checkout@v3 - - uses: actions/setup-dotnet@v3 + - uses: actions/checkout@v4 + - uses: actions/setup-dotnet@v4 with: dotnet-version: | 7.0.x 8.0.x - name: Cache Packages - uses: actions/cache@v3 + uses: actions/cache@v4 with: key: "unit_test_models" path: LLama.Unittest/Models diff --git a/LLama.Unittest/Constants.cs b/LLama.Unittest/Constants.cs index 49e4906ea..6e6324491 100644 --- a/LLama.Unittest/Constants.cs +++ b/LLama.Unittest/Constants.cs @@ -3,5 +3,8 @@ internal static class Constants { public static string ModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf"; + public static string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf"; + public static string LLavaMmpPath = "Models/mmproj-model-f16.gguf"; + public static string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg"; } } diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj index 920e6e98f..f6bebea73 100644 --- a/LLama.Unittest/LLama.Unittest.csproj +++ b/LLama.Unittest/LLama.Unittest.csproj @@ -27,8 +27,9 @@ - - + + + @@ -44,5 +45,14 @@ PreserveNewest + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + diff --git a/LLama.Unittest/LLamaEmbedderTests.cs b/LLama.Unittest/LLamaEmbedderTests.cs index ca15dc6f4..9935fc863 100644 --- a/LLama.Unittest/LLamaEmbedderTests.cs +++ b/LLama.Unittest/LLamaEmbedderTests.cs @@ -14,6 +14,8 @@ public LLamaEmbedderTests(ITestOutputHelper testOutputHelper) _testOutputHelper = testOutputHelper; var @params = new ModelParams(Constants.ModelPath) { + ContextSize = 4096, + Threads = 5, EmbeddingMode = true, }; using var weights = LLamaWeights.LoadFromFile(@params); @@ -31,6 +33,7 @@ private static float Dot(float[] a, float[] b) return a.Zip(b, (x, y) => x * y).Sum(); } + [Fact] public async Task EmbedCompare() { diff --git a/LLama.Unittest/LLavaWeightsTests.cs b/LLama.Unittest/LLavaWeightsTests.cs new file mode 100644 index 000000000..beeb2fc49 --- /dev/null +++ b/LLama.Unittest/LLavaWeightsTests.cs @@ -0,0 +1,53 @@ +using LLama.Common; +using LLama.Native; + +namespace LLama.Unittest +{ + // Test the same things as llama model + image embedings + // + public sealed class LLavaWeightTests + : IDisposable + { + private readonly LLamaWeights _llamaWeights; + private readonly LLavaWeights _lLavaWeights; + private readonly LLamaContext _context; + + public LLavaWeightTests() + { + var @params = new ModelParams(Constants.ModelPath) + { + // Llava models requires big context + ContextSize = 4096 + }; + _llamaWeights = LLamaWeights.LoadFromFile(@params); + _lLavaWeights = LLavaWeights.LoadFromFile(Constants.LLavaMmpPath); + + _context = _llamaWeights.CreateContext(@params); + + } + + public void Dispose() + { + _llamaWeights.Dispose(); + _lLavaWeights.Dispose(); + } + + + + [Fact] + public void EmbedImageAsFileName() + { + int n_past = 0; + Assert.True( _lLavaWeights.EmbedImage( _context, Constants.LLavaImage, ref n_past ) ); + } + + [Fact] + public void EmbedImageAsBinary() + { + int n_past = 0; + byte[] image = System.IO.File.ReadAllBytes(Constants.LLavaImage); + Assert.True( _lLavaWeights.EmbedImage( _context, image, ref n_past ) ); + } + + } +} diff --git a/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg b/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg new file mode 100644 index 000000000..078fde7c4 Binary files /dev/null and b/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg differ diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets index f26ad24e0..35534d3fb 100644 --- a/LLama/LLamaSharp.Runtime.targets +++ b/LLama/LLamaSharp.Runtime.targets @@ -67,5 +67,51 @@ PreserveNewest runtimes/osx-x64/native/libllama.dylib + + + PreserveNewest + runtimes/win-x64/native/noavx/llava_shared.dll + + + PreserveNewest + runtimes/win-x64/native/avx/llava_shared.dll + + + PreserveNewest + runtimes/win-x64/native/avx2/llava_shared.dll + + + PreserveNewest + runtimes/win-x64/native/avx512/llava_shared.dll + + + + PreserveNewest + runtimes/linux-x64/native/noavx/libllava_shared.so + + + PreserveNewest + runtimes/linux-x64/native/avx/libllava_shared.so + + + PreserveNewest + runtimes/linux-x64/native/avx2/libllava_shared.so + + + PreserveNewest + runtimes/linux-x64/native/avx512/libllava_shared.so + + + + PreserveNewest + runtimes/osx-arm64/native/libllava_shared.dylib + + + + PreserveNewest + runtimes/osx-x64/native/libllava_shared.dylib + + + \ No newline at end of file diff --git a/LLama/LLavaWeights.cs b/LLama/LLavaWeights.cs new file mode 100644 index 000000000..301fb7293 --- /dev/null +++ b/LLama/LLavaWeights.cs @@ -0,0 +1,51 @@ + +using System; +using LLama.Native; + +namespace LLama; + +public sealed class LLavaWeights : IDisposable +{ + public SafeLlavaModelHandle NativeHandle { get; } + + internal LLavaWeights(SafeLlavaModelHandle weights) + { + NativeHandle = weights; + } + + public static LLavaWeights LoadFromFile(string mmProject) + { + var weights = SafeLlavaModelHandle.LoadFromFile(mmProject, 1); + return new LLavaWeights(weights); + } + + /// + /// Embed the image from file into llama context + /// + /// + /// + /// + /// + public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past ) + { + return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past ); + } + + /// + /// Embed the image from binary into llama context. + /// + /// + /// + /// + /// + public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, ref int n_past ) + { + return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past ); + } + + public void Dispose() + { + NativeHandle.Dispose(); + } + +} \ No newline at end of file diff --git a/LLama/Native/LLavaImageEmbed.cs b/LLama/Native/LLavaImageEmbed.cs new file mode 100644 index 000000000..2030515ec --- /dev/null +++ b/LLama/Native/LLavaImageEmbed.cs @@ -0,0 +1,13 @@ +using System.Runtime.InteropServices; + +namespace LLama.Native; + +/// +/// LLaVa Image embeddings +/// +[StructLayout(LayoutKind.Sequential)] +unsafe public struct LLavaImageEmbed +{ + public float* embed; + public int n_image_pos; +} \ No newline at end of file diff --git a/LLama/Native/NativeApi.LLava.cs b/LLama/Native/NativeApi.LLava.cs new file mode 100644 index 000000000..7930e3755 --- /dev/null +++ b/LLama/Native/NativeApi.LLava.cs @@ -0,0 +1,60 @@ +using System; +using System.Runtime.InteropServices; + +namespace LLama.Native; + +using clip_ctx = IntPtr; +public static unsafe partial class NativeApi +{ + /// + /// Sanity check for clip <-> llava embed size match + /// + /// + [DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)] + public static extern bool llava_validate_embed_size( SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip); + + /// + /// Build an image embed from image file bytes + /// + /// + /// + /// + /// + /// + [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes", + CallingConvention = CallingConvention.Cdecl)] + public static extern + SafeLlavaImageEmbedHandle llava_image_embed_make_with_bytes(SafeLlavaModelHandle ctx_clip, int n_threads, + byte[] image_bytes, int image_bytes_length); + + /// + /// Build an image embed from a path to an image filename + /// + /// + /// + /// + /// + [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", CallingConvention = CallingConvention.Cdecl)] + public static extern + SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHandle ctx_clip, int n_threads, + [MarshalAs(UnmanagedType.LPStr)] string image_path); + + /// + /// Free an embedding made with llava_image_embed_make_* + /// + /// + /// + [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)] + public static extern SafeLlavaImageEmbedHandle llava_image_embed_free(IntPtr embed); + + /// + /// Write the image represented by embed into the llama context with batch size n_batch, starting at context + /// pos n_past. on completion, n_past points to the next position in the context after the image embed. + /// + /// ctx_llama + /// + [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)] + public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, SafeLlavaImageEmbedHandle embed, + int n_batch, ref int n_past); + +} \ No newline at end of file diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index c6c04e211..1b1868161 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -235,6 +235,7 @@ private static List GetLibraryTryOrder(NativeLibraryConfig.Description c if (platform == OSPlatform.OSX) { result.Add($"{prefix}{libraryNamePrefix}{libraryName}{suffix}"); + result.Add($"{prefix}{libraryNamePrefix}{llavaLibraryName}{suffix}"); } return result; @@ -303,6 +304,11 @@ string TryFindPath(string filename) if (result is not null && result != IntPtr.Zero) { Log($"{fullPath} is selected and loaded successfully.", LogLevel.Information); + + // One we have clear the detection and that llama loads successfully we load LLaVa if exist on the + // same path. + TryLoad( libraryPath.Replace("llama", "llava_shared"), true); + return (IntPtr)result; } @@ -338,6 +344,7 @@ string TryFindPath(string filename) } internal const string libraryName = "llama"; + internal const string llavaLibraryName = "llava_shared"; private const string cudaVersionFile = "version.json"; private const string loggingPrefix = "[LLamaSharp Native]"; private static bool enableLogging = false; diff --git a/LLama/Native/SafeLlavaImageEmbedHandle.cs b/LLama/Native/SafeLlavaImageEmbedHandle.cs new file mode 100644 index 000000000..f1c62a2d1 --- /dev/null +++ b/LLama/Native/SafeLlavaImageEmbedHandle.cs @@ -0,0 +1,45 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using LLama; +using LLama.Exceptions; + + +namespace LLama.Native +{ + /// + /// A Reference to a set of llava Image Embed handle + /// + public sealed class SafeLlavaImageEmbedHandle + : SafeLLamaHandleBase + { + + private SafeLlavaImageEmbedHandle(IntPtr handle) + : base(handle, true) + { + } + + private SafeLlavaImageEmbedHandle() + {} + + public static SafeLlavaImageEmbedHandle CreateFromFileName( SafeLlavaModelHandle ctxLlava, LLamaContext ctxLlama, string image ) + { + return NativeApi.llava_image_embed_make_with_filename(ctxLlava, (int) ctxLlama.BatchThreads, image); + } + + public static SafeLlavaImageEmbedHandle CreateFromMemory( SafeLlavaModelHandle ctxLlava, LLamaContext ctxLlama, Byte[] image ) + { + return NativeApi.llava_image_embed_make_with_bytes(ctxLlava, (int) ctxLlama.BatchThreads, image, image.Length); + } + + /// + protected override bool ReleaseHandle() + { + NativeApi.llava_image_embed_free(DangerousGetHandle()); + SetHandle(IntPtr.Zero); + return true; + } + } +} diff --git a/LLama/Native/SafeLlavaModelHandle.cs b/LLama/Native/SafeLlavaModelHandle.cs new file mode 100644 index 000000000..f95440412 --- /dev/null +++ b/LLama/Native/SafeLlavaModelHandle.cs @@ -0,0 +1,104 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using LLama; +using LLama.Exceptions; + + +namespace LLama.Native +{ + /// + /// A reference to a set of llava model weights + /// + public sealed class SafeLlavaModelHandle + : SafeLLamaHandleBase + { + + private SafeLlavaModelHandle(IntPtr handle) + : base(handle, true) + { + } + + private SafeLlavaModelHandle() + {} + + /// + protected override bool ReleaseHandle() + { + clip_free(DangerousGetHandle()); + SetHandle(IntPtr.Zero); + return true; + } + + /// + /// Load a model from the given file path into memory + /// + /// + /// + /// + /// + public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity ) + { + + // Try to open the model file, this will check: + // - File exists (automatically throws FileNotFoundException) + // - File is readable (explicit check) + // This provides better error messages that llama.cpp, which would throw an access violation exception in both cases. + using (var fs = new FileStream(modelPath, FileMode.Open)) + if (!fs.CanRead) + throw new InvalidOperationException($"Llava MMP Model file '{modelPath}' is not readable"); + + return clip_model_load(modelPath, verbosity) + ?? throw new RuntimeError($"Failed to load LLaVa model {modelPath}."); + } + + /// + /// Embed the image from file in llama context + /// + /// + /// + /// + /// + public bool EmbedImage(LLamaContext ctxLlama, string image, ref int n_past) + { + var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromFileName(this, ctxLlama, image); + bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past ); + return result; + } + + /// + /// Embed the image from binary in llama context + /// + /// + /// jpeg image + /// + /// + public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, ref int n_past ) + { + var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromMemory(this, ctxLlama, image ); + bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past ); + return result; + } + + /// + /// Load MULTI MODAL PROJECTIONS model / Clip Model + /// + /// Model path/file + /// Verbosity level + /// SafeLlavaModelHandle + [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_model_load", CallingConvention = CallingConvention.Cdecl)] + private static extern SafeLlavaModelHandle clip_model_load(string mmProj, int verbosity); + + /// + /// Frees MULTI MODAL PROJECTIONS model / Clip Model + /// + /// + [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)] + private static extern void clip_free(IntPtr ctx); + + + } +}