pytorch-labs · larryliu0820 · Jun 17, 2025 · Jun 16, 2025
diff --git a/include/pytorch/tokenizers/llama2c_tokenizer.h b/include/pytorch/tokenizers/llama2c_tokenizer.h
@@ -12,11 +12,6 @@
 
 namespace tokenizers {
 
-struct TokenIndex {
-  const char* str;
-  int32_t id;
-};
-
 // A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code
 // won't work with this class, it needs to go through tokenizer.py first.
 class Llama2cTokenizer : public Tokenizer {

diff --git a/include/pytorch/tokenizers/sentencepiece.h b/include/pytorch/tokenizers/sentencepiece.h
@@ -16,11 +16,6 @@
 #include "sentencepiece_processor.h"
 namespace tokenizers {
 
-struct TokenIndex {
-  const char* str;
-  int32_t id;
-};
-
 class SPTokenizer : public Tokenizer {
  public:
   explicit SPTokenizer();

diff --git a/include/pytorch/tokenizers/tokenizer.h b/include/pytorch/tokenizers/tokenizer.h
@@ -20,6 +20,11 @@
 
 namespace tokenizers {
 
+struct TokenIndex {
+  const char* str;
+  int32_t id;
+};
+
 class Tokenizer {
  public:
   explicit Tokenizer() {}