diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index b56096b45e17c..4e61f43265e83 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -254,8 +254,9 @@ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen") # extra artifacts -option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) -option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) +option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) +option(GGML_CPU_REF_BACKEND "ggml: build reference CPU backend for testing" OFF) +option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) # # dependencies @@ -285,7 +286,9 @@ add_subdirectory(src) if (GGML_BUILD_TESTS) enable_testing() - add_subdirectory(tests) + if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests") + add_subdirectory(tests) + endif () endif () if (GGML_BUILD_EXAMPLES) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index ab297e0c6f69f..88f21ae346534 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -243,6 +243,8 @@ extern "C" { // Load all known backends from dynamic libraries GGML_API void ggml_backend_load_all(void); GGML_API void ggml_backend_load_all_from_path(const char * dir_path); + // Load all CPU dynamic libraries and register them + GGML_API ggml_backend_reg_t * ggml_backend_load_cpu_variants(void); // // Backend scheduler diff --git a/ggml/include/ggml-cpu.h b/ggml/include/ggml-cpu.h index 9edd485136972..004acea318d4c 100644 --- a/ggml/include/ggml-cpu.h +++ b/ggml/include/ggml-cpu.h @@ -75,6 +75,7 @@ extern "C" { // // x86 + GGML_BACKEND_API int ggml_cpu_has_sse2 (void); GGML_BACKEND_API int ggml_cpu_has_sse3 (void); GGML_BACKEND_API int ggml_cpu_has_ssse3 (void); GGML_BACKEND_API int ggml_cpu_has_avx (void); diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index c8f3d8596427c..16e5271a21696 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -388,6 +388,20 @@ ggml_add_backend(WebGPU) ggml_add_backend(zDNN) ggml_add_backend(OpenCL) +if (GGML_CPU_REF_BACKEND) + if (NOT GGML_BACKEND_DL) + message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL") + endif() + set(GGML_SYSTEM_ARCH "cpu-ref") + set(GGML_LLAMAFILE OFF) + set(GGML_CPU_HBM OFF) + set(GGML_OPENMP OFF) + set(GGML_CPU_KLEIDIAI OFF) + set(GGML_CPU_REPACK OFF) + ggml_add_cpu_backend_variant(ref) + target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF) +endif() + foreach (target ggml-base ggml) target_include_directories(${target} PUBLIC $ $) target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 7002cb07e0015..521faa54fa6ba 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -596,4 +596,60 @@ void ggml_backend_load_all_from_path(const char * dir_path) { if (backend_path) { ggml_backend_load(backend_path); } +#ifdef GGML_USE_CPU_REF + ggml_backend_load_best("cpu-ref", silent, dir_path); +#endif +} + +ggml_backend_reg_t * ggml_backend_load_cpu_variants(void) { + // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths + const fs::path name_path = fs::u8path("cpu"); + const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native(); + const fs::path file_extension = backend_filename_extension(); + + std::vector search_paths; +#ifdef GGML_BACKEND_DIR + search_paths.push_back(fs::u8path(GGML_BACKEND_DIR)); +#endif + // default search paths: executable directory, current directory + search_paths.push_back(get_executable_path()); + search_paths.push_back(fs::current_path()); + + ggml_backend_reg_t * backends = nullptr; + size_t count = 0; + size_t capacity = 0; + for (const auto & search_path : search_paths) { + if (!fs::exists(search_path)) { + GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str()); + continue; + } + fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied); + for (const auto & entry : dir_it) { + if (entry.is_regular_file()) { + auto filename = entry.path().filename(); + auto ext = entry.path().extension(); + if (filename.native().find(file_prefix.native()) == 0 && ext == file_extension) { + fs::path path = search_path / filename; + ggml_backend_reg_t backend = get_reg().load_backend(path, false); + if (backend) { + if (count >= capacity) { + capacity = capacity == 0 ? 4 : capacity * 2; + ggml_backend_reg_t * new_backends = (ggml_backend_reg_t *)realloc(backends, (capacity + 1) * sizeof(ggml_backend_reg_t)); + + if (!new_backends) { + free(backends); + return nullptr; + } + backends = new_backends; + } + backends[count++] = backend; + } + } + } + } + } + if (backends) { + backends[count] = nullptr; + } + return backends; } diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 79a5282be37c8..8057b04189e15 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -1945,6 +1945,30 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s // allocate nodes ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend); + + // This is an absolute hack, but it is only to try to force the use of the + // extra repack buffers to see if I can come up with a better way or get + // some feeback from others how to go about doing this. + ggml_backend_buffer_t extra_buffer = nullptr; + std::vector extra_buft_list; + auto * dev = ggml_backend_get_device(backend); + auto * reg = ggml_backend_dev_backend_reg(dev); + auto get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_dev_get_extra_bufts"); + if (get_extra_bufts_fn) { + ggml_backend_buffer_type_t * extra_bufts = get_extra_bufts_fn(dev); + while (extra_bufts && *extra_bufts) { + extra_buft_list.push_back(*extra_bufts); + ++extra_bufts; + } + } + if (extra_buft_list.size() > 0) { + // Setting size to 1 just to ensure that the underlying extra buffer + // allocation is called. In the case of the repack buffer it does not + // really use the buffer and the repacking is done directory on the + // tensor data. + extra_buffer = ggml_backend_buft_alloc_buffer(extra_buft_list[0], 1); + } + if (buffer == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer for graph copy\n", __func__); ggml_hash_set_free(&hash_set); @@ -1952,6 +1976,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s free(node_init); ggml_free(ctx_allocated); ggml_free(ctx_unallocated); + ggml_backend_buffer_free(extra_buffer); return { /* .buffer = */ NULL, /* .ctx_allocated = */ NULL, @@ -1965,6 +1990,20 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s // copy data and init views for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; + + // Again just here to see if I can get the repacking to work. + if (extra_buffer && !ggml_op_is_empty(node->op) && node->src[0]) { + auto dev = ggml_backend_buft_get_device(ggml_backend_buffer_get_type(extra_buffer)); + if (ggml_backend_dev_supports_op(dev, node)) { + size_t id = ggml_hash_find(&hash_set, node->src[0]); + ggml_status status = ggml_backend_buffer_init_tensor(extra_buffer, node_copies[id]); + if (status != GGML_STATUS_SUCCESS) { + GGML_LOG_ERROR("%s: failed to initialize tensor in extra buffer for graph copy\n", __func__); + } + node_copies[id]->buffer = extra_buffer; + } + } + graph_copy_init_tensor(&hash_set, node_copies, node_init, node); } @@ -1980,6 +2019,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s ggml_hash_set_free(&hash_set); free(node_copies); free(node_init); + ggml_backend_buffer_free(extra_buffer); return { /* .buffer = */ buffer, diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 369905750754f..83032e0ca11d9 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) + if (tag_name) + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}") + else() + target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU") + endif() + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index c131290849538..06c8f54df35fe 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -3428,6 +3428,14 @@ int ggml_cpu_has_llamafile(void) { #endif } +int ggml_cpu_has_sse2(void) { +#if defined(__SSE2__) + return 1; +#else + return 0; +#endif +} + int ggml_cpu_has_sse3(void) { #if defined(__SSE3__) return 1; diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index 81a314e4d68d7..6f42b0a8f4e87 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -98,7 +98,7 @@ struct ggml_backend_cpu_context { }; static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(backend); } @@ -327,7 +327,7 @@ struct ggml_backend_cpu_device_context { }; static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) { - return "CPU"; + return GGML_CPU_VARIANT_NAME; GGML_UNUSED(dev); } @@ -506,6 +506,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r ggml_cpu_init(); std::vector features; + if (ggml_cpu_has_sse2()) { + features.push_back({ "SSE2", "1" }); + } if (ggml_cpu_has_sse3()) { features.push_back({ "SSE3", "1" }); } diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index f11eecd8e71a5..76e14ea5a2588 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -39,6 +39,7 @@ #include #include #include +#include static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t nels = ggml_nelements(tensor); @@ -324,6 +325,7 @@ enum test_mode { MODE_PERF, MODE_GRAD, MODE_SUPPORT, + MODE_CPU_VARIANTS, }; // Output format support similar to llama-bench @@ -1137,12 +1139,6 @@ struct test_case { // allocate ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1); - if (buf == NULL) { - printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1)); - ggml_free(ctx); - return false; - } - // build graph ggml_build_forward_expand(gf, out); @@ -6906,18 +6902,125 @@ static void show_test_coverage() { printf(" Coverage: %.1f%%\n", (double)covered_ops.size() / all_ops.size() * 100.0); } +static void print_backend_features(ggml_backend_t backend) { + auto device = ggml_backend_get_device(backend); + auto reg = ggml_backend_dev_backend_reg(device); + auto name = ggml_backend_dev_name(device); + auto * get_features_fn = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features"); + if (get_features_fn) { + ggml_backend_feature * features = get_features_fn(reg); + printf("%s features:\n", name); + if (features->name == nullptr) { + printf(" (no features reported)\n"); + } else { + for (; features->name; features++) { + printf(" %s = %s\n", features->name, features->value); + } + } + } +} + +static bool test_cpu_variant(const char * variant_name, const char * op_names_filter, + const char * params_filter, printer * output_printer) { + std::string backend_ref_name = "CPU-ref"; + ggml_backend_reg_t * backend_regs = ggml_backend_load_cpu_variants(); + free(backend_regs); + + ggml_backend_t backend_ref = ggml_backend_init_by_name(backend_ref_name.c_str(), nullptr); + if (backend_ref == nullptr) { + printf("Error: CPU-ref backend not found. Make sure it's built and available.\n"); + return false; + } + print_backend_features(backend_ref); + + ggml_backend_t backend_variant = ggml_backend_init_by_name(variant_name, nullptr); + if (backend_variant == nullptr) { + printf("Error: CPU variant '%s' not found or failed to initialize.\n", variant_name); + printf("Use --list to see available variants.\n"); + ggml_backend_free(backend_ref); + return false; + } + print_backend_features(backend_variant); + + printf("Testing CPU variant '%s' against '%s' backend...\n\n", variant_name, backend_ref_name.c_str()); + + auto test_cases = make_test_cases_eval(); + + if (params_filter != nullptr) { + std::regex regex(params_filter); + auto it = test_cases.begin(); + while (it != test_cases.end()) { + std::string test_params = (*it)->vars(); + if (!std::regex_search(test_params, regex)) { + it = test_cases.erase(it); + } else { + it++; + } + } + } + + size_t n_ok = 0; + for (auto & test : test_cases) { + // Switch the order so that we copy from the reference backend to the + // variant backend. + if (test->eval(backend_ref, backend_variant, op_names_filter, output_printer)) { + n_ok++; + } + } + + output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false)); + + ggml_backend_free(backend_variant); + ggml_backend_free(backend_ref); + + return n_ok == test_cases.size(); +} + +static void list_cpu_variants() { + std::unordered_map variant_names; + ggml_backend_reg_t * backend_regs = ggml_backend_load_cpu_variants(); + if (backend_regs) { + for (ggml_backend_reg_t * reg = backend_regs; *reg != nullptr; ++reg) { + for (size_t j = 0; j < ggml_backend_reg_dev_count(*reg); j++) { + ggml_backend_dev_t dev = ggml_backend_reg_dev_get(*reg, j); + const char * name = ggml_backend_dev_name(dev); + if (strcmp(name, "CPU-ref") != 0) { + variant_names.emplace(name, ggml_backend_dev_description(dev)); + } + } + } + free(backend_regs); + } + + if (variant_names.size() == 0) { + printf("No CPU backend variants found. To enable CPU variants, rebuild with:\n"); + printf(" cmake -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON\n"); + return; + } + + printf("CPU variants:\n"); + for (const auto & it : variant_names) { + printf(" %-15s - %s\n", it.first.c_str(), it.second.c_str()); + } +} + static void usage(char ** argv) { - printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--show-coverage]\n", argv[0]); + printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ] [--list-ops] [--list-cpu-variants] [--show-coverage]\n", argv[0]); printf(" valid modes:\n"); printf(" - test (default, compare with CPU backend for correctness)\n"); printf(" - grad (compare gradients from backpropagation with method of finite differences)\n"); printf(" - perf (performance evaluation)\n"); printf(" - support (probe backend operation support)\n"); + printf(" - cpu-variants (test CPU variants against cpu-ref backend)\n"); printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc),\n"); printf(" optionally including the full test case string (e.g. \"ADD(type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1)\")\n"); printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); printf(" --list-ops lists all available GGML operations\n"); + printf(" --list-cpu-variants lists all available CPU backend variants\n"); printf(" --show-coverage shows test coverage\n"); + printf(" cpu-variants mode options:\n"); + printf(" --list lists available CPU variants on this system\n"); + printf(" --variant test specific CPU variant against cpu-ref backend\n"); } int main(int argc, char ** argv) { @@ -6926,6 +7029,7 @@ int main(int argc, char ** argv) { const char * op_names_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; + const char * cpu_variant_name = nullptr; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -6936,6 +7040,8 @@ int main(int argc, char ** argv) { mode = MODE_GRAD; } else if (strcmp(argv[i], "support") == 0) { mode = MODE_SUPPORT; + } else if (strcmp(argv[i], "cpu-variants") == 0) { + mode = MODE_CPU_VARIANTS; } else if (strcmp(argv[i], "-o") == 0) { if (i + 1 < argc) { op_names_filter = argv[++i]; @@ -6970,6 +7076,16 @@ int main(int argc, char ** argv) { } else if (strcmp(argv[i], "--list-ops") == 0) { list_all_ops(); return 0; + } else if (strcmp(argv[i], "--list") == 0) { + list_cpu_variants(); + return 0; + } else if (strcmp(argv[i], "--variant") == 0) { + if (i + 1 < argc) { + cpu_variant_name = argv[++i]; + } else { + usage(argv); + return 1; + } } else if (strcmp(argv[i], "--show-coverage") == 0) { show_test_coverage(); return 0; @@ -6979,8 +7095,6 @@ int main(int argc, char ** argv) { } } - // load and enumerate backends - ggml_backend_load_all(); // Create printer for output format std::unique_ptr output_printer = create_printer(output_format); @@ -6988,6 +7102,19 @@ int main(int argc, char ** argv) { output_printer->print_header(); } + if (mode == MODE_CPU_VARIANTS) { + if (cpu_variant_name == nullptr) { + printf("Error: cpu-variants mode requires --variant or --list\n"); + usage(argv); + return 1; + } + + return test_cpu_variant(cpu_variant_name, op_names_filter, params_filter, output_printer.get()) ? 0 : 1; + } + + // load and enumerate backends + ggml_backend_load_all(); + output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count())); size_t n_ok = 0;