Skip to content

November Binary Update #962

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/_typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ extend-exclude = [
"LLama.Benchmark/Assets/",
"LLama.Examples/Assets/"
]

[default.extend-words]
# Used in a comment in SafeLLamaSamplerHandle.cs, as a prefix of "hello"
teh = "hel"
1 change: 0 additions & 1 deletion LLama.Examples/Examples/CustomSampler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ protected override SafeLLamaSamplerChainHandle CreateChain(SafeLLamaContextHandl
chain.AddCustom(new RemoveMostLikelyToken());

// Select from the distribution
chain.AddSoftmax();
chain.AddDistributionSampler(42);

return chain;
Expand Down
1 change: 0 additions & 1 deletion LLama/Extensions/LLamaExecutorExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ private string CreatePrompt(IList<ChatMessage> messages)
MinKeep = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinKeep), out int mk) is true ? mk : s_defaultPipeline.MinKeep,
MinP = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinP), out float mp) is true ? mp : s_defaultPipeline.MinP,
Seed = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.Seed), out uint seed) is true ? seed : (uint)(t_random ??= new()).Next(),
TailFreeZ = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.TailFreeZ), out float tfz) is true ? tfz : s_defaultPipeline.TailFreeZ,
Temperature = options?.Temperature ?? 0,
TopP = options?.TopP ?? 0,
TopK = options?.TopK ?? s_defaultPipeline.TopK,
Expand Down
2 changes: 1 addition & 1 deletion LLama/LLamaSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
</ItemGroup>

<PropertyGroup>
<BinaryReleaseId>c35e586ea5722184</BinaryReleaseId>
<BinaryReleaseId>958367bf530d943a90</BinaryReleaseId>
</PropertyGroup>

<PropertyGroup>
Expand Down
3 changes: 2 additions & 1 deletion LLama/LLavaWeights.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ namespace LLama;
/// <summary>
/// A set of llava model weights (mmproj), loaded into memory.
/// </summary>
public sealed class LLavaWeights : IDisposable
public sealed class LLavaWeights
: IDisposable
{
/// <summary>
/// The native handle, which is used in the native APIs
Expand Down
10 changes: 3 additions & 7 deletions LLama/Native/LLamaNativeBatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public unsafe struct LLamaNativeBatch

/// <summary>
/// the positions of the respective token in the sequence
/// (if set to NULL, the token position will be tracked automatically by llama_decode)
/// </summary>
public LLamaPos* pos;

Expand All @@ -35,18 +36,13 @@ public unsafe struct LLamaNativeBatch

/// <summary>
/// the sequence to which the respective token belongs
/// (if set to NULL, the sequence ID will be assumed to be 0)
/// </summary>
public LLamaSeqId** seq_id;

/// <summary>
/// if zero, the logits for the respective token will not be output
/// (if set to NULL, only the logits for last token will be returned)
/// </summary>
public byte* logits;

// Note from llama.cpp:
// > helpers for smooth API transition - can be deprecated in the future
// > for future-proof code, use the above fields instead and ignore everything below
private LLamaPos _all_pos_0;
private LLamaPos _all_pos_1;
private LLamaSeqId _all_seq_id;
}
5 changes: 5 additions & 0 deletions LLama/Native/LLamaPoolingType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,9 @@ public enum LLamaPoolingType
CLS = 2,

Last = 3,

/// <summary>
/// Used by reranking models to attach the classification head to the graph
/// </summary>
Rank,
}
1 change: 1 addition & 0 deletions LLama/Native/LLamaVocabPreType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ internal enum LLamaVocabPreType
BLOOM = 23,
GPT3_FINNISH = 24,
EXAONE = 25,
CHAMELEON = 26,
}
186 changes: 0 additions & 186 deletions LLama/Native/NativeApi.Sampling.cs

This file was deleted.

8 changes: 8 additions & 0 deletions LLama/Native/NativeApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ public static void llama_empty_call()
[return: MarshalAs(UnmanagedType.U1)]
public static extern bool llama_supports_gpu_offload();

/// <summary>
/// Check if RPC offload is supported
/// </summary>
/// <returns></returns>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.U1)]
public static extern bool llama_supports_rpc();

/// <summary>
/// Initialize the llama + ggml backend. Call once at the start of the program.
///
Expand Down
6 changes: 4 additions & 2 deletions LLama/Native/SafeLLamaContextHandle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,10 @@ static SafeLLamaContextHandle()
private static extern LLamaPoolingType llama_pooling_type(SafeLLamaContextHandle ctx);

/// <summary>
/// Get the embeddings for the a specific sequence.
/// Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
/// Get the embeddings for a sequence id.
/// Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
/// when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
/// otherwise: float[n_embd] (1-dimensional)
/// </summary>
/// <returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
Expand Down
Loading
Loading