llama : let context be const when accessing const data (#1261)

grencez · web-flow · commit 90b19bd6eee9 · 2023-05-01T10:24:20.000+03:00
diff --git a/llama.cpp b/llama.cpp
@@ -2373,7 +2373,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
     }
 }
 
-int llama_get_kv_cache_token_count(struct llama_context * ctx) {
+int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
     return ctx->model.kv_self.n;
 }
 
@@ -2387,7 +2387,7 @@ void llama_set_rng_seed(struct llama_context * ctx, int seed) {
 }
 
 // Returns the size of the state
-size_t llama_get_state_size(struct llama_context * ctx) {
+size_t llama_get_state_size(const struct llama_context * ctx) {
     // we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
     // for reference, std::mt19937(1337) serializes to 6701 bytes.
     const size_t s_rng_size        = sizeof(size_t);
@@ -2605,15 +2605,15 @@ int llama_tokenize(
     return res.size();
 }
 
-int llama_n_vocab(struct llama_context * ctx) {
+int llama_n_vocab(const struct llama_context * ctx) {
     return ctx->vocab.id_to_token.size();
 }
 
-int llama_n_ctx(struct llama_context * ctx) {
+int llama_n_ctx(const struct llama_context * ctx) {
     return ctx->model.hparams.n_ctx;
 }
 
-int llama_n_embd(struct llama_context * ctx) {
+int llama_n_embd(const struct llama_context * ctx) {
     return ctx->model.hparams.n_embd;
 }
 
@@ -2625,7 +2625,7 @@ float * llama_get_embeddings(struct llama_context * ctx) {
     return ctx->embedding.data();
 }
 
-const char * llama_token_to_str(struct llama_context * ctx, llama_token token) {
+const char * llama_token_to_str(const struct llama_context * ctx, llama_token token) {
     if (token >= llama_n_vocab(ctx)) {
         return nullptr;
     }
diff --git a/llama.h b/llama.h
@@ -120,13 +120,13 @@ extern "C" {
                              int   n_threads);
 
     // Returns the number of tokens in the KV cache
-    LLAMA_API int llama_get_kv_cache_token_count(struct llama_context * ctx);
+    LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
 
     // Sets the current rng seed.
     LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed);
 
     // Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
-    LLAMA_API size_t llama_get_state_size(struct llama_context * ctx);
+    LLAMA_API size_t llama_get_state_size(const struct llama_context * ctx);
 
     // Copies the state to the specified destination address.
     // Destination needs to have allocated enough memory.
@@ -164,9 +164,9 @@ extern "C" {
                              int   n_max_tokens,
                             bool   add_bos);
 
-    LLAMA_API int llama_n_vocab(struct llama_context * ctx);
-    LLAMA_API int llama_n_ctx  (struct llama_context * ctx);
-    LLAMA_API int llama_n_embd (struct llama_context * ctx);
+    LLAMA_API int llama_n_vocab(const struct llama_context * ctx);
+    LLAMA_API int llama_n_ctx  (const struct llama_context * ctx);
+    LLAMA_API int llama_n_embd (const struct llama_context * ctx);
 
     // Token logits obtained from the last call to llama_eval()
     // The logits for the last token are stored in the last row
@@ -180,7 +180,7 @@ extern "C" {
     LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
 
     // Token Id -> String. Uses the vocabulary in the provided context
-    LLAMA_API const char * llama_token_to_str(struct llama_context * ctx, llama_token token);
+    LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
 
     // Special tokens
     LLAMA_API llama_token llama_token_bos();

Original file line number	Diff line number	Diff line change
`@@ -2373,7 +2373,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor`
`2373`	`2373`	`}`
`2374`	`2374`	`}`
`2375`	`2375`
`2376`		`-int llama_get_kv_cache_token_count(struct llama_context * ctx) {`
	`2376`	`+int llama_get_kv_cache_token_count(const struct llama_context * ctx) {`
`2377`	`2377`	`return ctx->model.kv_self.n;`
`2378`	`2378`	`}`
`2379`	`2379`
`@@ -2387,7 +2387,7 @@ void llama_set_rng_seed(struct llama_context * ctx, int seed) {`
`2387`	`2387`	`}`
`2388`	`2388`
`2389`	`2389`	`// Returns the size of the state`
`2390`		`-size_t llama_get_state_size(struct llama_context * ctx) {`
	`2390`	`+size_t llama_get_state_size(const struct llama_context * ctx) {`
`2391`	`2391`	`// we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.`
`2392`	`2392`	`// for reference, std::mt19937(1337) serializes to 6701 bytes.`
`2393`	`2393`	`const size_t s_rng_size = sizeof(size_t);`
`@@ -2605,15 +2605,15 @@ int llama_tokenize(`
`2605`	`2605`	`return res.size();`
`2606`	`2606`	`}`
`2607`	`2607`
`2608`		`-int llama_n_vocab(struct llama_context * ctx) {`
	`2608`	`+int llama_n_vocab(const struct llama_context * ctx) {`
`2609`	`2609`	`return ctx->vocab.id_to_token.size();`
`2610`	`2610`	`}`
`2611`	`2611`
`2612`		`-int llama_n_ctx(struct llama_context * ctx) {`
	`2612`	`+int llama_n_ctx(const struct llama_context * ctx) {`
`2613`	`2613`	`return ctx->model.hparams.n_ctx;`
`2614`	`2614`	`}`
`2615`	`2615`
`2616`		`-int llama_n_embd(struct llama_context * ctx) {`
	`2616`	`+int llama_n_embd(const struct llama_context * ctx) {`
`2617`	`2617`	`return ctx->model.hparams.n_embd;`
`2618`	`2618`	`}`
`2619`	`2619`
`@@ -2625,7 +2625,7 @@ float * llama_get_embeddings(struct llama_context * ctx) {`
`2625`	`2625`	`return ctx->embedding.data();`
`2626`	`2626`	`}`
`2627`	`2627`
`2628`		`-const char * llama_token_to_str(struct llama_context * ctx, llama_token token) {`
	`2628`	`+const char * llama_token_to_str(const struct llama_context * ctx, llama_token token) {`
`2629`	`2629`	`if (token >= llama_n_vocab(ctx)) {`
`2630`	`2630`	`return nullptr;`
`2631`	`2631`	`}`