Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ models-mnt
/batched-bench
/export-lora
/finetune
/retrieval
/speculative
/parallel
/train-text-from-scratch
Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
BUILD_TARGETS = \
main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o

# Binaries only useful for tests
TEST_TARGETS = \
Expand Down Expand Up @@ -794,6 +794,10 @@ export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
Expand Down
42 changes: 42 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,43 @@ static bool gpt_params_find_arg(int argc, char ** argv, gpt_params & params, int
}
return true;
}
if (arg == "--context-files") {
if (++i >= argc) {
invalid_param = true;
return true;
}
while(true) {
std::ifstream file(argv[i]);
if (!file) {
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
invalid_param = true;
break;
}
// store the external file name in params
params.context_files.push_back(argv[i]);
if (i + 1 >= argc || argv[i + 1][0] == '-') {
break;
}
i++;
}
return true;
}
if (arg == "--chunk-size") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.chunk_size = std::stoi(argv[i]);
return true;
}
if (arg == "--chunk-separator") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.chunk_separator = argv[i];
return true;
}
if (arg == "-n" || arg == "--n-predict") {
if (++i >= argc) {
invalid_param = true;
Expand Down Expand Up @@ -1282,6 +1319,11 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" prompt file to start generation.\n");
printf(" -bf FNAME, --binary-file FNAME\n");
printf(" binary file containing multiple choice tasks.\n");
printf(" --context-files FNAME1 FNAME2...\n");
printf(" files containing context to embed.\n");
printf(" --chunk-size N minimum length of embedded text chunk (default:%d)\n", params.chunk_size);
printf(" --chunk-separator STRING\n");
printf(" string to separate chunks (default: \"\\n\")\n");
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
Expand Down
3 changes: 3 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ struct gpt_params {
float yarn_beta_slow = 1.0f; // YaRN high correction dim
int32_t yarn_orig_ctx = 0; // YaRN original context length
float defrag_thold = -1.0f; // KV cache defragmentation threshold
std::vector<std::string> context_files;// context files to embed
int32_t chunk_size = 64; // chunk size for context embedding
std::string chunk_separator = "\n"; // chunk separator for context embedding

ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;

Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ else()
add_subdirectory(perplexity)
add_subdirectory(quantize)
add_subdirectory(quantize-stats)
add_subdirectory(retrieval)
add_subdirectory(save-load-state)
add_subdirectory(simple)
add_subdirectory(passkey)
Expand Down
5 changes: 5 additions & 0 deletions examples/retrieval/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set(TARGET retrieval)
add_executable(${TARGET} retrieval.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)
6 changes: 6 additions & 0 deletions examples/retrieval/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# llama.cpp/examples/retrieval

Demonstration of simple retrieval technique based on cosin similarity

More info:
https://github.com/ggerganov/llama.cpp/pull/6193
Loading