6
6
* LICENSE file in the root directory of this source tree.
7
7
*/
8
8
9
- #ifdef EXECUTORCH_FB_BUCK
10
- #include < TestResourceUtils/TestResourceUtils.h>
11
- #endif
12
9
#include < executorch/extension/llm/tokenizer/tiktoken.h>
13
10
#include < executorch/runtime/platform/runtime.h>
14
11
#include < gmock/gmock.h>
15
12
#include < gtest/gtest.h>
16
- #include < vector>
17
13
18
14
using namespace ::testing;
19
15
using ::executorch::extension::llm::Tiktoken;
@@ -49,15 +45,6 @@ static inline std::unique_ptr<std::vector<std::string>> _get_special_tokens() {
49
45
}
50
46
return special_tokens;
51
47
}
52
-
53
- static inline std::string _get_resource_path (const std::string& name) {
54
- #ifdef EXECUTORCH_FB_BUCK
55
- return facebook::xplat::testing::getPathForTestResource (" resources/" + name);
56
- #else
57
- return std::getenv (" RESOURCES_PATH" ) + std::string (" /" ) + name;
58
- #endif
59
- }
60
-
61
48
} // namespace
62
49
63
50
class TiktokenExtensionTest : public Test {
@@ -66,7 +53,7 @@ class TiktokenExtensionTest : public Test {
66
53
executorch::runtime::runtime_init ();
67
54
tokenizer_ = std::make_unique<Tiktoken>(
68
55
_get_special_tokens (), kBOSTokenIndex , kEOSTokenIndex );
69
- modelPath_ = _get_resource_path ( " test_tiktoken_tokenizer.model " );
56
+ modelPath_ = std::getenv ( " TEST_TIKTOKEN_TOKENIZER " );
70
57
}
71
58
72
59
std::unique_ptr<Tokenizer> tokenizer_;
@@ -84,15 +71,15 @@ TEST_F(TiktokenExtensionTest, DecodeWithoutLoadFails) {
84
71
}
85
72
86
73
TEST_F (TiktokenExtensionTest, TokenizerVocabSizeIsExpected) {
87
- Error res = tokenizer_->load (modelPath_. c_str () );
74
+ Error res = tokenizer_->load (modelPath_);
88
75
EXPECT_EQ (res, Error::Ok);
89
76
EXPECT_EQ (tokenizer_->vocab_size (), 128256 );
90
77
EXPECT_EQ (tokenizer_->bos_tok (), 128000 );
91
78
EXPECT_EQ (tokenizer_->eos_tok (), 128001 );
92
79
}
93
80
94
81
TEST_F (TiktokenExtensionTest, TokenizerEncodeCorrectly) {
95
- Error res = tokenizer_->load (modelPath_. c_str () );
82
+ Error res = tokenizer_->load (modelPath_);
96
83
EXPECT_EQ (res, Error::Ok);
97
84
Result<std::vector<uint64_t >> out = tokenizer_->encode (" hello world" , 1 , 0 );
98
85
EXPECT_EQ (out.error (), Error::Ok);
@@ -103,7 +90,7 @@ TEST_F(TiktokenExtensionTest, TokenizerEncodeCorrectly) {
103
90
}
104
91
105
92
TEST_F (TiktokenExtensionTest, TokenizerDecodeCorrectly) {
106
- Error res = tokenizer_->load (modelPath_. c_str () );
93
+ Error res = tokenizer_->load (modelPath_);
107
94
EXPECT_EQ (res, Error::Ok);
108
95
std::vector<std::string> expected = {" <|begin_of_text|>" , " hello" , " world" };
109
96
std::vector<uint64_t > tokens = {128000 , 15339 , 1917 };
@@ -115,7 +102,7 @@ TEST_F(TiktokenExtensionTest, TokenizerDecodeCorrectly) {
115
102
}
116
103
117
104
TEST_F (TiktokenExtensionTest, TokenizerDecodeOutOfRangeFails) {
118
- Error res = tokenizer_->load (modelPath_. c_str () );
105
+ Error res = tokenizer_->load (modelPath_);
119
106
EXPECT_EQ (res, Error::Ok);
120
107
// The vocab size is 128256, addes 256 just so the token is out of vocab
121
108
// range.
@@ -160,31 +147,29 @@ TEST_F(TiktokenExtensionTest, LoadWithInvalidPath) {
160
147
}
161
148
162
149
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidRank) {
163
- auto invalidModelPath =
164
- _get_resource_path (" test_tiktoken_invalid_rank.model" );
165
- Error res = tokenizer_->load (invalidModelPath.c_str ());
150
+ auto invalidModelPath = std::getenv (" TEST_TIKTOKEN_INVALID_RANK" );
151
+ Error res = tokenizer_->load (invalidModelPath);
166
152
167
153
EXPECT_EQ (res, Error::InvalidArgument);
168
154
}
169
155
170
156
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithInvalidBase64) {
171
- auto invalidModelPath =
172
- _get_resource_path (" test_tiktoken_invalid_base64.model" );
173
- Error res = tokenizer_->load (invalidModelPath.c_str ());
157
+ auto invalidModelPath = std::getenv (" TEST_TIKTOKEN_INVALID_BASE64" );
158
+ Error res = tokenizer_->load (invalidModelPath);
174
159
175
160
EXPECT_EQ (res, Error::InvalidArgument);
176
161
}
177
162
178
163
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithNoSpace) {
179
- auto invalidModelPath = _get_resource_path ( " test_tiktoken_no_space.model " );
180
- Error res = tokenizer_->load (invalidModelPath. c_str () );
164
+ auto invalidModelPath = std::getenv ( " TEST_TIKTOKEN_NO_SPACE " );
165
+ Error res = tokenizer_->load (invalidModelPath);
181
166
182
167
EXPECT_EQ (res, Error::InvalidArgument);
183
168
}
184
169
185
170
TEST_F (TiktokenExtensionTest, LoadTiktokenFileWithBPEFile) {
186
- auto invalidModelPath = _get_resource_path ( " test_bpe_tokenizer.bin " );
187
- Error res = tokenizer_->load (invalidModelPath. c_str () );
171
+ auto invalidModelPath = std::getenv ( " TEST_BPE_TOKENIZER " );
172
+ Error res = tokenizer_->load (invalidModelPath);
188
173
189
174
EXPECT_EQ (res, Error::InvalidArgument);
190
175
}
0 commit comments