mmwillet · danielzgtg · Jul 17, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/examples/quantize/CMakeLists.txt b/examples/quantize/CMakeLists.txt
@@ -1,2 +1,11 @@
 add_executable(quantize quantize.cpp)
 target_link_libraries(quantize PRIVATE ggml tts)
+
+if (NOT WIN32)
+    add_custom_target(quantize_snapshot_test.sh ALL DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/quantize_snapshot_test.sh)
+    add_custom_command(OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/quantize_snapshot_test.sh
+            COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/quantize_snapshot_test.sh
+            ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/quantize_snapshot_test.sh
+            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/quantize_snapshot_test.sh)
+    add_dependencies(quantize_snapshot_test.sh quantize)
+endif ()
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
@@ -1,24 +1,29 @@
-#include "tts.h"
-#include "args.h"
 #include <stdio.h>
 #include <thread>
-#include "ggml.h"
+#include <map>
 #include <vector>
 
-std::vector<ggml_type> valid_quantization_types = {
-    GGML_TYPE_F16,
-    GGML_TYPE_Q4_0,
-    GGML_TYPE_Q5_0,
-    GGML_TYPE_Q8_0,
+#include "args.h"
+#include "ggml.h"
+#include "tts.h"
+
+const std::map<std::string, ggml_type> valid_quantization_types = {
+    {"FP16", GGML_TYPE_F16},
+    {"F16", GGML_TYPE_F16},
+    {"Q4_0", GGML_TYPE_Q4_0},
+    {"Q4", GGML_TYPE_Q4_0},
+    {"Q5_0", GGML_TYPE_Q5_0},
+    {"Q5", GGML_TYPE_Q5_0},
+    {"Q8_0", GGML_TYPE_Q8_0},
+    {"Q8", GGML_TYPE_Q8_0},
 };
 
 int main(int argc, const char ** argv) {
-	int default_quantization = (int) GGML_TYPE_Q4_0;
     int default_n_threads = std::max((int)std::thread::hardware_concurrency(), 1);
     arg_list args;
     args.add_argument(string_arg("--model-path", "(REQUIRED) The local path of the gguf model file for Parler TTS mini v1 to quantize.", "-mp", true));
     args.add_argument(string_arg("--quantized-model-path", "(REQUIRED) The path to save the model in a quantized format.", "-qp", true));
-    args.add_argument(int_arg("--quantized-type", "(OPTIONAL) The ggml enum of the quantized type to convert compatible model tensors to. For more information see readme. Defaults to Q4_0 quantization (2).", "-qt", false, &default_quantization));
+    args.add_argument(string_arg("--quantized-type", "(OPTIONAL) The ggml enum of the quantized type to convert compatible model tensors to. For more information see readme. Defaults to Q4_0 quantization (2).", "-qt", false, "Q4_0"));
     args.add_argument(int_arg("--n-threads", "(OPTIONAL) The number of cpu threads to run the quantization process with. Defaults to known hardware concurrency.", "-nt", false, &default_n_threads));
     args.add_argument(bool_arg("--convert-dac-to-f16", "(OPTIONAL) Whether to convert the DAC audio decoder model to a 16 bit float.", "-df"));
     args.add_argument(bool_arg("--quantize-output-heads", "(OPTIONAL) Whether to quantize the output heads. Defaults to false and is true when passed (does not accept a parameter).", "-qh"));
@@ -31,12 +36,13 @@ int main(int argc, const char ** argv) {
         return 0;
     }
     args.validate();
-    enum ggml_type qtype = static_cast<ggml_type>(*args.get_int_param("--quantized-type"));
-    if (std::find(valid_quantization_types.begin(), valid_quantization_types.end(), qtype) == valid_quantization_types.end()) {
-    	fprintf(stderr, "ERROR: %d is not a valid quantization type.\n", qtype);
+    std::string qtype = args.get_string_param("--quantized-type");
+    if (!valid_quantization_types.contains(qtype)) {
+        fprintf(stderr, "ERROR: %s is not a valid quantization type.\n",
+                qtype.c_str());
         exit(1);
     }
-    struct quantization_params * qp = new quantization_params((uint32_t) *args.get_int_param("--n-threads"), qtype);
+    struct quantization_params * qp = new quantization_params((uint32_t) *args.get_int_param("--n-threads"), valid_quantization_types.at(qtype));
     qp->quantize_output_heads = args.get_bool_param("--quantize-output-heads");
     qp->quantize_text_embeddings = args.get_bool_param("--quantize-text-embedding");
     qp->quantize_cross_attn_kv = args.get_bool_param("--quantize-cross-attn-kv");

diff --git a/examples/quantize/quantize_snapshot_test.sh b/examples/quantize/quantize_snapshot_test.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+set -e
+# Performs snapshot testing against mmwillet2's version on Hugging Face
+
+# To avoid redownloading models, cd to a persistent directory instead of build/bin
+[ $# -eq 0 ] || { echo 'Usage: ./quantize_snapshot_test.sh'; exit 1; }
+quantize="$(dirname "$0")"/quantize
+[ -x "$quantize" ] || { echo 'quantize binary in script directory is not executable'; exit 1; }
+
+if [ ! -e gguf_py_venv ]; then
+  read -rp 'Path to llama.cpp (or blank): ' llamacpppath
+  if [ -z "$llamacpppath" ]; then
+    ln -s /dev/null gguf_py_venv
+  else
+    [ -f "$llamacpppath"/gguf-py/pyproject.toml ] || { echo 'Incompatible llama.cpp or path'; exit 1; }
+    pushd "$llamacpppath"/gguf-py
+    if [ ! -d venv ]; then
+      python3 -m venv venv
+      (
+        . venv/bin/activate
+        pip install -e .
+      )
+    fi
+    popd
+    ln -s "$llamacpppath"/gguf-py/venv gguf_py_venv
+  fi
+fi
+if [ -d gguf_py_venv ]; then
+  . gguf_py_venv/bin/activate
+  dumper=gguf-dump
+  command -v "$dumper" >/dev/null 2>&1 || { echo 'Missing gguf-dump'; exit 1; }
+fi
+
+if [ -z "$XDG_RUNTIME_DIR" ]; then
+  XDG_RUNTIME_DIR=/tmp # CI or macOS
+else
+  size="$(sed -n "/^tmpfs ${XDG_RUNTIME_DIR//\//\\\/}/s/.\\+size=\\([0-9]\\+\\)k.\\+/\\1/p" /proc/mounts)"
+  if [ -n "$size" ] && [ "$size" -lt 4194304  ]; then
+    (
+      set -x
+      sudo mount -o remount,size=4G "$XDG_RUNTIME_DIR"
+    )
+  fi
+fi
+
+[ -d Dia_GGUF ] || git clone https://huggingface.co/mmwillet2/Dia_GGUF
+[ -d Kokoro_GGUF ] || git clone https://huggingface.co/mmwillet2/Kokoro_GGUF
+[ -d parler-tts-mini-v1-GGUF ] || git clone https://huggingface.co/ecyht2/parler-tts-mini-v1-GGUF
+
+declare -a extra_args
+function q {
+  model_dir="$(dirname "$model")"
+  log="$(
+    set -x
+    "$quantize" -mp "$model" -qt "$1" -qp "$XDG_RUNTIME_DIR"/test.gguf "${extra_args[@]}" 2>&1
+  )" || echo -n "$log"
+
+  new_hash="$(sha256sum "$XDG_RUNTIME_DIR"/test.gguf)"
+  new_hash="${new_hash% *}"
+  echo "$new_hash"' '"$XDG_RUNTIME_DIR"/test.gguf
+  old_hash="$(git -C "$model_dir" cat-file -p HEAD:"$2" | sed -n '2s/^oid sha256://p')"
+  echo "$old_hash"'  '"$model_dir"/"$2"
+
+  [ "$new_hash" != "$old_hash" ] && [ -n "$dumper" ] && "$dumper" "$XDG_RUNTIME_DIR"/test.gguf > "$XDG_RUNTIME_DIR"/test.gguf.gguf-dump.txt 2>/dev/null
+  unlink "$XDG_RUNTIME_DIR"/test.gguf
+  if [ "$new_hash" != "$old_hash" ] && [ -n "$dumper" ]; then
+    [ -f "$model_dir"/"$2".gguf-dump.txt ] || "$dumper" "$model_dir"/"$2" > "$model_dir"/"$2".gguf-dump.txt 2>/dev/null
+    diff -U3 "$model_dir"/"$2".gguf-dump.txt "$XDG_RUNTIME_DIR"/test.gguf.gguf-dump.txt || :
+  fi
+}
+
+model=Dia_GGUF/Dia.gguf
+extra_args=(-nt 3)
+q F16 Dia_F16.gguf
+q Q4 Dia_Q4.gguf
+q Q5 Dia_Q5.gguf
+q Q8 Dia_Q8.gguf
+extra_args=(-nt 3 -df)
+q F16 Dia_F16_DAC_F16.gguf
+q Q4 Dia_Q4_DAC_F16.gguf
+q Q5 Dia_Q5_DAC_F16.gguf
+q Q8 Dia_Q8_DAC_F16.gguf
+
+model=Kokoro_GGUF/Kokoro_espeak.gguf
+extra_args=(-nt 3 -nqf)
+q F16 Kokoro_espeak_F16.gguf
+q Q4 Kokoro_espeak_Q4.gguf
+q Q5 Kokoro_espeak_Q5.gguf
+q Q8 Kokoro_espeak_Q8.gguf
+model=Kokoro_GGUF/Kokoro_no_espeak.gguf
+q F16 Kokoro_no_espeak_F16.gguf
+q Q4 Kokoro_no_espeak_Q4.gguf
+q Q5 Kokoro_no_espeak_Q5.gguf
+q Q8 Kokoro_no_espeak_Q8.gguf
+
+model=parler-tts-mini-v1-GGUF/parler-tts-mini-v1-fp32.gguf
+extra_args=(-nt 3)
+q FP16 parler-tts-mini-v1-fp16.gguf
+q Q4_0 parler-tts-mini-v1-Q4_0.gguf
+q Q5_0 parler-tts-mini-v1-Q5_0.gguf
+q Q8_0 parler-tts-mini-v1-Q8_0.gguf
+
+rm "$XDG_RUNTIME_DIR"/test.gguf*