@@ -1322,11 +1322,13 @@ struct llama_vocab::impl {
1322
1322
char * text,
1323
1323
int32_t text_len_max,
1324
1324
bool remove_special,
1325
- bool unparse_special) const ;
1325
+ bool unparse_special,
1326
+ bool remove_space_prefix = true ) const ;
1326
1327
1327
1328
std::string detokenize (
1328
1329
const std::vector<llama_token> & tokens,
1329
- bool special) const ;
1330
+ bool special,
1331
+ bool remove_space_prefix = true ) const ;
1330
1332
1331
1333
void print_info () const ;
1332
1334
@@ -2581,7 +2583,8 @@ int32_t llama_vocab::impl::detokenize(
2581
2583
char * text,
2582
2584
int32_t text_len_max,
2583
2585
bool remove_special,
2584
- bool unparse_special) const {
2586
+ bool unparse_special,
2587
+ bool remove_space_prefix) const {
2585
2588
if (type == LLAMA_VOCAB_TYPE_NONE) {
2586
2589
return 0 ;
2587
2590
}
@@ -2592,7 +2595,7 @@ int32_t llama_vocab::impl::detokenize(
2592
2595
int32_t total = 0 ;
2593
2596
2594
2597
// remove the leading space
2595
- bool remove_space = add_space_prefix;
2598
+ bool remove_space = add_space_prefix && remove_space_prefix ;
2596
2599
2597
2600
if (remove_special && add_bos) {
2598
2601
if (n_tokens > 0 && tokens[0 ] == special_bos_id) {
@@ -2991,17 +2994,18 @@ int32_t llama_vocab::detokenize(
2991
2994
char * text,
2992
2995
int32_t text_len_max,
2993
2996
bool remove_special,
2994
- bool unparse_special) const {
2995
- return pimpl->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
2997
+ bool unparse_special,
2998
+ bool remove_space_prefix) const {
2999
+ return pimpl->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special, remove_space_prefix);
2996
3000
}
2997
3001
2998
- std::string llama_vocab::detokenize (const std::vector<llama_token> & tokens, bool special) const {
3002
+ std::string llama_vocab::detokenize (const std::vector<llama_token> & tokens, bool special, bool remove_space_prefix ) const {
2999
3003
std::string text;
3000
3004
text.resize (std::max (text.capacity (), tokens.size ()));
3001
- int32_t n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special);
3005
+ int32_t n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special, remove_space_prefix );
3002
3006
if (n_chars < 0 ) {
3003
3007
text.resize (-n_chars);
3004
- n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special);
3008
+ n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special, remove_space_prefix );
3005
3009
GGML_ASSERT (n_chars <= (int32_t )text.size ()); // whitespace trimming is performed after per-token detokenization
3006
3010
}
3007
3011
@@ -3246,7 +3250,8 @@ int32_t llama_detokenize(
3246
3250
char * text,
3247
3251
int32_t text_len_max,
3248
3252
bool remove_special,
3249
- bool unparse_special) {
3250
- return vocab->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
3253
+ bool unparse_special,
3254
+ bool remove_space_prefix) {
3255
+ return vocab->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special, remove_space_prefix);
3251
3256
}
3252
3257
0 commit comments