CodeWithKyrian · CodeWithKyrian · Jul 23, 2024 · Jul 23, 2024
diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -66,11 +66,13 @@ Arguments:
   can
   specify it here. This downloads any additional configuration or data needed for that task.
 - `[options]` (optional): Additional options to customize the download process.
-    - `-cache_dir=<directory>`: Choose where to save the models. If you've got a preferred storage spot, mention it
+    - `--cache-dir=<directory>`: Choose where to save the models. If you've got a preferred storage spot, mention it
       here. Otherwise, it goes to the default cache location. You can use the shorthand `-c` instead of `--cache_dir`.
     - `--quantized=<true|false>`: Decide whether you want the quantized version of the model, which is smaller and
       faster. The default is true, but if for some reason you prefer the full version, you can set this to false. You
       can use the shorthand `-q` instead of `--quantized`. Example: `--quantized=false`, `-q false`.
+    - `--model-filename=<filename>`: Specify the exact model filename to download (without the `.onnx` suffix. Eg. "
+      model" or "model_quantized".
 
 The `download` command will download the model weights and save them to the cache directory. The next time you use the
 model, TransformersPHP will use the cached weights instead of downloading them again.
@@ -199,7 +201,7 @@ OpenMP is a set of compiler directives and library routines that enable parallel
 programs. TransformersPHP uses OpenMP to enable multithreaded operations in the Tensors, which can improve performance
 on multi-core systems. OpenMP is not required, but it can provide a significant performance boost for some operations.
 Checkout the [OpenMP website](https://www.openmp.org/) for more information on how to install and configure OpenMP on
-your system. 
+your system.
 
 Example: On Ubuntu, you can install OpenMP using the following command:
 

diff --git a/src/Commands/DownloadModelCommand.php b/src/Commands/DownloadModelCommand.php
@@ -46,12 +46,20 @@ protected function configure(): void
 
         $this->addOption(
             'quantized',
-            null,
+            'q',
             InputOption::VALUE_OPTIONAL,
             'Whether to download the quantized version of the model.',
             true
         );
 
+        $this->addOption(
+            'model-filename',
+            null,
+            InputOption::VALUE_OPTIONAL,
+            'The filename of the exact model weights version to download.',
+            null
+        );
+
     }
 
     protected function execute(InputInterface $input, OutputInterface $output): int

diff --git a/src/PreTokenizers/DigitsPreTokenizer.php b/src/PreTokenizers/DigitsPreTokenizer.php
@@ -9,16 +9,21 @@ class DigitsPreTokenizer extends PreTokenizer
 {
 
     protected string $pattern;
+
     public function __construct(protected array $config)
     {
         $individualDigits = $this->config['individual_digits'] ? '' : '+';
+
         $digitPattern = "[^\\d]+|\\d$individualDigits";
 
         $this->pattern = "/$digitPattern/u";
 
     }
+
     public function preTokenizeText(string|array $text, array $options): array
     {
-        return preg_split($this->pattern, $text, -1, PREG_SPLIT_NO_EMPTY) ?? [];
+        preg_match_all($this->pattern, $text, $matches, PREG_SPLIT_NO_EMPTY);
+
+        return $matches[0] ?? [];
     }
 }