6
6
* LICENSE file in the root directory of this source tree.
7
7
*/
8
8
9
- #ifdef EXECUTORCH_FB_BUCK
10
- #include < TestResourceUtils/TestResourceUtils.h>
11
- #endif
12
9
#include < executorch/extension/llm/tokenizer/tiktoken.h>
13
10
#include < executorch/runtime/platform/runtime.h>
14
11
#include < gmock/gmock.h>
15
12
#include < gtest/gtest.h>
16
- #include < vector>
17
13
18
14
using namespace ::testing;
19
15
using ::executorch::extension::llm::Tiktoken;
@@ -49,15 +45,6 @@ static inline std::unique_ptr<std::vector<std::string>> _get_special_tokens() {
49
45
}
50
46
return special_tokens;
51
47
}
52
-
53
- static inline std::string _get_resource_path (const std::string& name) {
54
- #ifdef EXECUTORCH_FB_BUCK
55
- return facebook::xplat::testing::getPathForTestResource (" resources/" + name);
56
- #else
57
- return std::getenv (" RESOURCES_PATH" ) + std::string (" /" ) + name;
58
- #endif
59
- }
60
-
61
48
} // namespace
62
49
63
50
class TiktokenExtensionTest : public Test {
@@ -66,7 +53,8 @@ class TiktokenExtensionTest : public Test {
66
53
executorch::runtime::runtime_init ();
67
54
tokenizer_ = std::make_unique<Tiktoken>(
68
55
_get_special_tokens (), kBOSTokenIndex , kEOSTokenIndex );
69
- modelPath_ = _get_resource_path (" test_tiktoken_tokenizer.model" );
56
+ modelPath_ = std::getenv (" RESOURCES_PATH" ) +
57
+ std::string (" /test_tiktoken_tokenizer.model" );
70
58
}
71
59
72
60
std::unique_ptr<Tokenizer> tokenizer_;
@@ -84,15 +72,15 @@ TEST_F(TiktokenExtensionTest, DecodeWithoutLoadFails) {
84
72
}
85
73
86
74
TEST_F (TiktokenExtensionTest, TokenizerVocabSizeIsExpected) {
87
- Error res = tokenizer_->load (modelPath_. c_str () );
75
+ Error res = tokenizer_->load (modelPath_);
88
76
EXPECT_EQ (res, Error::Ok);
89
77
EXPECT_EQ (tokenizer_->vocab_size (), 128256 );
90
78
EXPECT_EQ (tokenizer_->bos_tok (), 128000 );
91
79
EXPECT_EQ (tokenizer_->eos_tok (), 128001 );
92
80
}
93
81
94
82
TEST_F (TiktokenExtensionTest, TokenizerEncodeCorrectly) {
95
- Error res = tokenizer_->load (modelPath_. c_str () );
83
+ Error res = tokenizer_->load (modelPath_);
96
84
EXPECT_EQ (res, Error::Ok);
97
85
Result<std::vector<uint64_t >> out = tokenizer_->encode (" hello world" , 1 , 0 );
98
86
EXPECT_EQ (out.error (), Error::Ok);
@@ -103,7 +91,7 @@ TEST_F(TiktokenExtensionTest, TokenizerEncodeCorrectly) {
103
91
}
104
92
105
93
TEST_F (TiktokenExtensionTest, TokenizerDecodeCorrectly) {
106
- Error res = tokenizer_->load (modelPath_. c_str () );
94
+ Error res = tokenizer_->load (modelPath_);
107
95
EXPECT_EQ (res, Error::Ok);
108
96
std::vector<std::string> expected = {" <|begin_of_text|>" , " hello" , " world" };
109
97
std::vector<uint64_t > tokens = {128000 , 15339 , 1917 };
@@ -115,7 +103,7 @@ TEST_F(TiktokenExtensionTest, TokenizerDecodeCorrectly) {
115
103
}
116
104
117
105
TEST_F (TiktokenExtensionTest, TokenizerDecodeOutOfRangeFails) {
118
- Error res = tokenizer_->load (modelPath_. c_str () );
106
+ Error res = tokenizer_->load (modelPath_);
119
107
EXPECT_EQ (res, Error::Ok);
120
108
// The vocab size is 128256, addes 256 just so the token is out of vocab
121
109
// range.
@@ -160,31 +148,33 @@ TEST_F(TiktokenExtensionTest, LoadWithInvalidPath) {
160
148
}
161
149
162
150
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidRank) {
163
- auto invalidModelPath =
164
- _get_resource_path ( " test_tiktoken_invalid_rank.model" );
165
- Error res = tokenizer_->load (invalidModelPath. c_str () );
151
+ auto invalidModelPath = std::getenv ( " RESOURCES_PATH " ) +
152
+ std::string ( " / test_tiktoken_invalid_rank.model" );
153
+ Error res = tokenizer_->load (invalidModelPath);
166
154
167
155
EXPECT_EQ (res, Error::InvalidArgument);
168
156
}
169
157
170
158
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidBase64) {
171
- auto invalidModelPath =
172
- _get_resource_path ( " test_tiktoken_invalid_base64.model" );
173
- Error res = tokenizer_->load (invalidModelPath. c_str () );
159
+ auto invalidModelPath = std::getenv ( " RESOURCES_PATH " ) +
160
+ std::string ( " / test_tiktoken_invalid_base64.model" );
161
+ Error res = tokenizer_->load (invalidModelPath);
174
162
175
163
EXPECT_EQ (res, Error::InvalidArgument);
176
164
}
177
165
178
166
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithNoSpace) {
179
- auto invalidModelPath = _get_resource_path (" test_tiktoken_no_space.model" );
180
- Error res = tokenizer_->load (invalidModelPath.c_str ());
167
+ auto invalidModelPath = std::getenv (" RESOURCES_PATH" ) +
168
+ std::string (" /test_tiktoken_no_space.model" );
169
+ Error res = tokenizer_->load (invalidModelPath);
181
170
182
171
EXPECT_EQ (res, Error::InvalidArgument);
183
172
}
184
173
185
174
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithBPEFile) {
186
- auto invalidModelPath = _get_resource_path (" test_bpe_tokenizer.bin" );
187
- Error res = tokenizer_->load (invalidModelPath.c_str ());
175
+ auto invalidModelPath =
176
+ std::getenv (" RESOURCES_PATH" ) + std::string (" /test_bpe_tokenizer.bin" );
177
+ Error res = tokenizer_->load (invalidModelPath);
188
178
189
179
EXPECT_EQ (res, Error::InvalidArgument);
190
180
}
0 commit comments