6
6
* LICENSE file in the root directory of this source tree.
7
7
*/
8
8
9
- #include < executorch/examples/models/llama2/tokenizer/tokenizer .h>
9
+ #include < executorch/examples/models/llama2/tokenizer/bpe_tokenizer .h>
10
10
11
11
#include < string>
12
12
@@ -23,11 +23,11 @@ static int compare_tokens(const void* a, const void* b) {
23
23
return strcmp (((TokenIndex*)a)->str , ((TokenIndex*)b)->str );
24
24
}
25
25
26
- Tokenizer::Tokenizer ( int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok)
27
- : initialized_( false ) ,
28
- vocab_size_ (vocab_size) ,
29
- bos_tok_(bos_tok),
30
- eos_tok_( eos_tok),
26
+ BPETokenizer::BPETokenizer (
27
+ int32_t vocab_size ,
28
+ uint64_t bos_tok ,
29
+ uint64_t eos_tok)
30
+ : Tokenizer(vocab_size, bos_tok, eos_tok),
31
31
vocab_ (std::make_unique<char *[]>(vocab_size)),
32
32
vocab_scores_(std::make_unique<float []>(vocab_size)),
33
33
sorted_vocab_(std::make_unique<TokenIndex[]>(vocab_size)) {
@@ -47,7 +47,7 @@ Tokenizer::Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok)
47
47
* @param tokenizer_path The path to the tokenizer file.
48
48
* @return Error
49
49
*/
50
- Error Tokenizer ::load (const std::string& tokenizer_path) {
50
+ Error BPETokenizer ::load (const std::string& tokenizer_path) {
51
51
if (initialized_) {
52
52
ET_LOG (Info, " Tokenizer already initialized" );
53
53
return Error::Ok;
@@ -131,7 +131,7 @@ Error Tokenizer::load(const std::string& tokenizer_path) {
131
131
return Error::Ok;
132
132
}
133
133
134
- Tokenizer ::~Tokenizer () {
134
+ BPETokenizer ::~BPETokenizer () {
135
135
for (int i = 0 ; i < vocab_size_; i++) {
136
136
delete[] vocab_[i];
137
137
}
@@ -145,7 +145,7 @@ Tokenizer::~Tokenizer() {
145
145
* @return Result<std::string> A pointer to the string representation of the
146
146
* token.
147
147
*/
148
- Result<std::string> Tokenizer ::decode (uint64_t prev_token, uint64_t token) {
148
+ Result<std::string> BPETokenizer ::decode (uint64_t prev_token, uint64_t token) {
149
149
if (!initialized_) {
150
150
ET_LOG (Error, " Tokenizer not initialized" );
151
151
return Error::NotSupported;
@@ -187,7 +187,7 @@ str_lookup(const char* str, TokenIndex* sorted_vocab, int32_t vocab_size) {
187
187
* @return Result<std::vector<uint64_t>>
188
188
*/
189
189
Result<std::vector<uint64_t >>
190
- Tokenizer ::encode (const std::string& text, int8_t bos, int8_t eos) {
190
+ BPETokenizer ::encode (const std::string& text, int8_t bos, int8_t eos) {
191
191
if (!initialized_) {
192
192
ET_LOG (Error, " Tokenizer not initialized" );
193
193
return Error::NotSupported;
0 commit comments