From 9ce3d499dcb97b5ea910cb131679093e5c122491 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 16 Mar 2025 18:39:05 -0700 Subject: [PATCH] Migrate users of llm tokenizer to use pytorch-labs/tokenizers (#9114) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/9114 Finally migrate llm tokenizer usages to pytorch-labs/tokenizers. Reviewed By: iseeyuan Differential Revision: D70932091 --- .ci/scripts/utils.sh | 8 ++++ examples/mediatek/CMakeLists.txt | 8 ++-- .../mtk_llama_executor_runner.cpp | 8 ++-- .../executor_runner/mtk_llama_runner.h | 6 +-- examples/models/llama/CMakeLists.txt | 6 ++- examples/models/llama/runner/CMakeLists.txt | 9 +++-- examples/models/llama/runner/runner.cpp | 21 +++++++---- examples/models/llama/runner/runner.h | 4 +- examples/models/llama/runner/targets.bzl | 2 +- .../models/llama/tokenizer/llama_tiktoken.cpp | 37 +++++++++---------- .../models/llama/tokenizer/llama_tiktoken.h | 6 ++- examples/models/llama/tokenizer/targets.bzl | 3 +- .../llama/tokenizer/test/test_tiktoken.cpp | 6 +-- examples/models/llava/runner/CMakeLists.txt | 5 ++- examples/models/llava/runner/llava_runner.cpp | 6 +-- examples/models/llava/runner/targets.bzl | 2 +- examples/models/phi-3-mini/CMakeLists.txt | 3 +- examples/models/phi-3-mini/runner.cpp | 12 +++--- examples/models/phi-3-mini/runner.h | 4 +- .../oss_scripts/llama/runner/runner.cpp | 15 ++++---- .../oss_scripts/llama/runner/runner.h | 4 +- .../qualcomm/oss_scripts/llama/targets.bzl | 2 +- .../qaihub_scripts/llama/CMakeLists.txt | 6 +-- .../qaihub_scripts/llama/runner/runner.cpp | 8 ++-- .../qaihub_scripts/llama/runner/runner.h | 4 +- extension/llm/runner/CMakeLists.txt | 2 +- extension/llm/runner/multimodal_runner.h | 4 +- extension/llm/runner/targets.bzl | 4 +- extension/llm/runner/text_prefiller.h | 2 - extension/llm/runner/text_token_generator.h | 9 +++-- extension/llm/runner/util.h | 24 ++++++++++++ extension/llm/tokenizers | 2 +- install_executorch.py | 18 +++++++++ .../xplat/executorch/build/env_interface.bzl | 21 ++++++++--- .../executorch/build/runtime_wrapper.bzl | 3 ++ test/build_size_test.sh | 2 +- 36 files changed, 183 insertions(+), 103 deletions(-) diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index 8d99a0022cf..c710196d896 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -20,6 +20,14 @@ clean_executorch_install_folders() { ./install_executorch.sh --clean } +update_tokenizers_git_submodule() { + echo "Updating tokenizers git submodule..." + git submodule update --init + pushd extension/llm/tokenizers + git submodule update --init + popd +} + install_executorch() { which pip # Install executorch, this assumes that Executorch is checked out in the diff --git a/examples/mediatek/CMakeLists.txt b/examples/mediatek/CMakeLists.txt index d8e2a5bf667..111c741c6aa 100644 --- a/examples/mediatek/CMakeLists.txt +++ b/examples/mediatek/CMakeLists.txt @@ -137,18 +137,18 @@ if(${ANDROID}) set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) # Build tokenizers - set(LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR}/tokenizer) + set(LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR}/tokenizers) add_library(tokenizer STATIC) target_include_directories( tokenizer PUBLIC ${_common_include_directories} ${THIRD_PARTY_ABSL_DIR} - ${THIRD_PARTY_RE2_DIR} + ${THIRD_PARTY_RE2_DIR} ${LLAMA2_TOKENIZER_DIR}/include ) target_link_libraries(tokenizer PRIVATE re2::re2) target_sources( tokenizer PRIVATE - ${LLAMA2_TOKENIZER_DIR}/tiktoken.cpp - ${LLAMA2_TOKENIZER_DIR}/bpe_tokenizer.cpp + ${LLAMA2_TOKENIZER_DIR}/src/tiktoken.cpp + ${LLAMA2_TOKENIZER_DIR}/src/llama2c_tokenizer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/llama/tokenizer/llama_tiktoken.cpp ) diff --git a/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp b/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp index 4fba0e20a81..73fc8a0fb89 100644 --- a/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp +++ b/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp @@ -68,8 +68,8 @@ #include "llama_runner/llm_helper/include/llm_types.h" #include -#include -#include +#include +#include // Llama model options DEFINE_uint64( @@ -140,10 +140,10 @@ using example::utils::read_file; using example::utils::split; using example::utils::Timer; using example::utils::to_string; -using executorch::extension::llm::BPETokenizer; -using executorch::extension::llm::Tokenizer; using executorch::runtime::Error; using executorch::runtime::Result; +using tokenizers::Llama2cTokenizer; +using tokenizers::Tokenizer; LlamaModelOptions get_model_options() { LlamaModelOptions options = { diff --git a/examples/mediatek/executor_runner/mtk_llama_runner.h b/examples/mediatek/executor_runner/mtk_llama_runner.h index 4c7b35d1a88..0f76f610a7e 100644 --- a/examples/mediatek/executor_runner/mtk_llama_runner.h +++ b/examples/mediatek/executor_runner/mtk_llama_runner.h @@ -14,8 +14,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -28,9 +28,9 @@ using Stats = ::executorch::llm::Stats; using example::LlamaModelOptions; using example::LlamaModelPaths; using example::LlamaRuntime; -using executorch::extension::llm::Tokenizer; using executorch::runtime::Error; using executorch::runtime::Result; +using tokenizers::Tokenizer; class MTKLlamaRunner : public executorch::extension::llm::IRunner { public: diff --git a/examples/models/llama/CMakeLists.txt b/examples/models/llama/CMakeLists.txt index 96ff28d8f49..50bb64a159b 100644 --- a/examples/models/llama/CMakeLists.txt +++ b/examples/models/llama/CMakeLists.txt @@ -209,7 +209,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release") endif() endif() -target_include_directories(llama_main PUBLIC ${_common_include_directories}) +target_include_directories( + llama_main + PUBLIC ${_common_include_directories} + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include +) target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries}) target_compile_options(llama_main PUBLIC ${_common_compile_options}) diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt index 919bc356551..f423a5a20f1 100644 --- a/examples/models/llama/runner/CMakeLists.txt +++ b/examples/models/llama/runner/CMakeLists.txt @@ -43,7 +43,7 @@ target_include_directories( list( APPEND _llama_runner__srcs - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/tiktoken.cpp ) list(APPEND _llama_runner__srcs ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp @@ -83,7 +83,10 @@ target_link_libraries(llama_runner PUBLIC ${llama_runner_deps}) target_include_directories( llama_runner - INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT} - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include + INTERFACE ${_common_include_directories} +) +target_include_directories( + llama_runner + PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include ) target_compile_options(llama_runner PUBLIC ${_preprocessor_flag}) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index f6820b8701e..e0a317aaff3 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include namespace example { @@ -78,17 +78,21 @@ Error Runner::load() { // load tokenizer. Assuming tiktoken is the default tokenizer tokenizer_ = nullptr; tokenizer_ = get_tiktoken_for_llama(); - Error err = tokenizer_->load(tokenizer_path_); + ::tokenizers::Error err = tokenizer_->load(tokenizer_path_); // Rely on tiktoken to throw error if the artifact is incompatible. Then we // fallback to BPE tokenizer. - if (err == Error::InvalidArgument) { + if (err != ::tokenizers::Error::Ok) { ET_LOG( Info, "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer", tokenizer_path_.c_str()); tokenizer_.reset(); - tokenizer_ = std::make_unique(); - tokenizer_->load(tokenizer_path_); + tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>(); + err = tokenizer_->load(tokenizer_path_); + ET_CHECK_TK_OK_OR_RETURN_ERROR( + err, + "Failed to load %s as a llama2.c tokenizer artifact", + tokenizer_path_.c_str()); } ET_LOG(Info, "Reading metadata from model"); @@ -201,12 +205,12 @@ Error Runner::generate( ? seq_len : metadata_.at(kMaxSeqLen); - Result> encode_res = tokenizer_->encode( + ::tokenizers::Result> encode_res = tokenizer_->encode( prompt, /* bos */ 0, /* eos */ 0); - ET_CHECK_OK_OR_RETURN_ERROR( + ET_CHECK_TK_OK_OR_RETURN_ERROR( encode_res.error(), "Failed to encode prompt %s", prompt.c_str()); // encode the (string) prompt into tokens sequence @@ -242,7 +246,8 @@ Error Runner::generate( uint64_t cur_token = prefill_res.get(); // print the first token from prefill. No prev_token so use cur_token for it. - wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token))); + wrapped_callback( + ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token))); RUNNER_ET_LOG( warmup, "RSS after prompt prefill: %f MiB (0 if unsupported)", diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h index 1acce2f8e92..509fe234027 100644 --- a/examples/models/llama/runner/runner.h +++ b/examples/models/llama/runner/runner.h @@ -23,8 +23,8 @@ #include #include #include -#include #include +#include namespace example { @@ -58,7 +58,7 @@ class ET_EXPERIMENTAL Runner : public executorch::extension::llm::IRunner { // model std::unique_ptr<::executorch::extension::Module> module_; std::string tokenizer_path_; - std::unique_ptr<::executorch::extension::llm::Tokenizer> tokenizer_; + std::unique_ptr<::tokenizers::Tokenizer> tokenizer_; std::unordered_map metadata_; std::unique_ptr<::executorch::extension::llm::TextDecoderRunner> text_decoder_runner_; diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl index 37827bb78a5..60fc1f2b74d 100644 --- a/examples/models/llama/runner/targets.bzl +++ b/examples/models/llama/runner/targets.bzl @@ -48,7 +48,7 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten:lib" + aten_suffix, "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix, "//executorch/examples/models/llama/tokenizer:tiktoken", - "//executorch/extension/llm/tokenizer:bpe_tokenizer", + "//pytorch/tokenizers:llama2c_tokenizer", ] + (_get_operator_lib(aten)) + ([ # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE) # Therefore enable it explicitly for now to avoid failing tests diff --git a/examples/models/llama/tokenizer/llama_tiktoken.cpp b/examples/models/llama/tokenizer/llama_tiktoken.cpp index 74eacc1b5f0..f595de3c4e7 100644 --- a/examples/models/llama/tokenizer/llama_tiktoken.cpp +++ b/examples/models/llama/tokenizer/llama_tiktoken.cpp @@ -10,7 +10,7 @@ namespace example { -using ::executorch::extension::llm::Tiktoken; +using ::tokenizers::Tiktoken; namespace { static constexpr int32_t kSpecialTokensSize = 256; @@ -42,8 +42,23 @@ _get_default_special_tokens() { return special_tokens; } -static inline std::unique_ptr> -_get_multimodal_special_tokens() { +std::unique_ptr> _get_special_tokens(Version version) { + switch (version) { + case Version::Multimodal: + return get_multimodal_special_tokens(); + default: + return _get_default_special_tokens(); + } +} + +} // namespace + +std::unique_ptr get_tiktoken_for_llama(Version version) { + return std::make_unique( + _get_special_tokens(version), kBOSTokenIndex, kEOSTokenIndex); +} + +std::unique_ptr> get_multimodal_special_tokens() { auto special_tokens = std::make_unique>(std::vector{ "<|begin_of_text|>", @@ -72,20 +87,4 @@ _get_multimodal_special_tokens() { return special_tokens; } -std::unique_ptr> _get_special_tokens(Version version) { - switch (version) { - case Version::Multimodal: - return _get_multimodal_special_tokens(); - default: - return _get_default_special_tokens(); - } -} - -} // namespace - -std::unique_ptr get_tiktoken_for_llama(Version version) { - return std::make_unique( - _get_special_tokens(version), kBOSTokenIndex, kEOSTokenIndex); -} - } // namespace example diff --git a/examples/models/llama/tokenizer/llama_tiktoken.h b/examples/models/llama/tokenizer/llama_tiktoken.h index 6baa3f49cc6..a7f65eca29e 100644 --- a/examples/models/llama/tokenizer/llama_tiktoken.h +++ b/examples/models/llama/tokenizer/llama_tiktoken.h @@ -8,7 +8,7 @@ #pragma once -#include +#include namespace example { @@ -17,7 +17,9 @@ enum class Version { Multimodal, }; -std::unique_ptr<::executorch::extension::llm::Tiktoken> get_tiktoken_for_llama( +std::unique_ptr<::tokenizers::Tiktoken> get_tiktoken_for_llama( Version version = Version::Default); +std::unique_ptr> get_multimodal_special_tokens(); + } // namespace example diff --git a/examples/models/llama/tokenizer/targets.bzl b/examples/models/llama/tokenizer/targets.bzl index 40f8f29ac1e..704ebfeecb6 100644 --- a/examples/models/llama/tokenizer/targets.bzl +++ b/examples/models/llama/tokenizer/targets.bzl @@ -15,7 +15,8 @@ def define_common_targets(): "llama_tiktoken.h", ], exported_deps = [ - "//executorch/extension/llm/tokenizer:tiktoken", + "//pytorch/tokenizers:tiktoken", + "//executorch/extension/llm/tokenizer:tiktoken", # TODO: remove ], visibility = [ "@EXECUTORCH_CLIENTS", diff --git a/examples/models/llama/tokenizer/test/test_tiktoken.cpp b/examples/models/llama/tokenizer/test/test_tiktoken.cpp index 442da621748..cbb54f2cb3b 100644 --- a/examples/models/llama/tokenizer/test/test_tiktoken.cpp +++ b/examples/models/llama/tokenizer/test/test_tiktoken.cpp @@ -10,7 +10,7 @@ #include -#include +#include #include @@ -36,8 +36,8 @@ static std::string get_resource_path(const std::string& name) { class MultimodalTiktokenV5ExtensionTest : public Test { public: void SetUp() override { - executorch::runtime::runtime_init(); - tokenizer_ = get_tiktoken_for_llama(Version::Multimodal); + tokenizer_ = std::make_unique( + example::get_multimodal_special_tokens(), 0, 1); modelPath_ = get_resource_path("test_tiktoken_tokenizer.model"); } diff --git a/examples/models/llava/runner/CMakeLists.txt b/examples/models/llava/runner/CMakeLists.txt index 7bad4a827ae..de1de6f501c 100644 --- a/examples/models/llava/runner/CMakeLists.txt +++ b/examples/models/llava/runner/CMakeLists.txt @@ -29,7 +29,7 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..) set(_llava_runner__srcs "${CMAKE_CURRENT_SOURCE_DIR}/llava_runner.cpp" "${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp" - "${EXECUTORCH_ROOT}/extension/llm/tokenizer/bpe_tokenizer.cpp" + "${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/llama2c_tokenizer.cpp" ) # extension llm runner lib @@ -47,5 +47,6 @@ set(llava_runner_deps executorch extension_data_loader extension_llm_runner target_link_libraries(llava_runner PUBLIC ${llava_runner_deps}) target_include_directories( - llava_runner INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT} + llava_runner INTERFACE ${_common_include_directories} + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include ) diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp index b3c0cce5c33..d368f8fb1a4 100644 --- a/examples/models/llava/runner/llava_runner.cpp +++ b/examples/models/llava/runner/llava_runner.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -43,7 +43,7 @@ Error LlavaRunner::load() { stats_.model_load_start_ms = llm::time_in_ms(); // Load the tokenizer - tokenizer_ = std::make_unique(); + tokenizer_ = std::make_unique(); tokenizer_->load(tokenizer_path_); // Load the text decoder runner @@ -90,7 +90,7 @@ Result LlavaRunner::prefill_prompt( int8_t bos, int8_t eos) { std::vector prompt_tokens = - ET_UNWRAP(tokenizer_->encode(prompt, bos, eos)); + ET_UNWRAP_TOKENIZER(tokenizer_->encode(prompt, bos, eos)); return text_prefiller_->prefill(prompt_tokens, start_pos); } diff --git a/examples/models/llava/runner/targets.bzl b/examples/models/llava/runner/targets.bzl index 63fcc9d3b33..074c92b35e3 100644 --- a/examples/models/llava/runner/targets.bzl +++ b/examples/models/llava/runner/targets.bzl @@ -14,7 +14,6 @@ def define_common_targets(): exported_deps = [ "//executorch/backends/xnnpack:xnnpack_backend", "//executorch/extension/llm/runner:runner_lib", - "//executorch/extension/llm/tokenizer:bpe_tokenizer", "//executorch/extension/evalue_util:print_evalue", "//executorch/extension/module:module", "//executorch/extension/tensor:tensor", @@ -23,5 +22,6 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/util:tensor_util", "//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops", + "//pytorch/tokenizers:llama2c_tokenizer", ], ) diff --git a/examples/models/phi-3-mini/CMakeLists.txt b/examples/models/phi-3-mini/CMakeLists.txt index e1ffd0da055..5e9cad0d3de 100644 --- a/examples/models/phi-3-mini/CMakeLists.txt +++ b/examples/models/phi-3-mini/CMakeLists.txt @@ -41,11 +41,12 @@ add_executable( phi_3_mini_runner main.cpp runner.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/sampler/sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizer/bpe_tokenizer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/src/llama2c_tokenizer.cpp ) target_include_directories( phi_3_mini_runner PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/gflags/src + ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/include ) target_link_libraries( phi_3_mini_runner PRIVATE executorch extension_module_static extension_tensor diff --git a/examples/models/phi-3-mini/runner.cpp b/examples/models/phi-3-mini/runner.cpp index 1163a35d66b..15f76e9522c 100644 --- a/examples/models/phi-3-mini/runner.cpp +++ b/examples/models/phi-3-mini/runner.cpp @@ -11,15 +11,15 @@ #include #include -#include #include #include +#include using executorch::aten::ScalarType; using executorch::extension::Module; -using executorch::extension::llm::BPETokenizer; using executorch::extension::llm::Sampler; using executorch::runtime::Error; +using tokenizers::Llama2cTokenizer; namespace example { @@ -32,14 +32,14 @@ Runner::Runner( const std::string& tokenizer_path, const float temperature) : module_(std::make_unique(model_path, Module::LoadMode::File)), - tokenizer_(std::make_unique()), + tokenizer_(std::make_unique()), sampler_(std::make_unique( VOCABULARY_SIZE, temperature, SAMPLER_TOP, static_cast(std::time(nullptr)))) { ET_CHECK_MSG( - tokenizer_->load(tokenizer_path) == Error::Ok, + tokenizer_->load(tokenizer_path) == tokenizers::Error::Ok, "Failed to load tokenizer at %s", tokenizer_path.c_str()); ET_LOG( @@ -52,7 +52,9 @@ Runner::Runner( void Runner::generate(const std::string& prompt, std::size_t max_seq_len) { auto encode_res = tokenizer_->encode(prompt, 0, 0); ET_CHECK_MSG( - encode_res.error() == Error::Ok, "Failed to encode %s", prompt.c_str()); + encode_res.error() == tokenizers::Error::Ok, + "Failed to encode %s", + prompt.c_str()); auto input_tokens = encode_res.get(); auto prev_token = input_tokens.back(); auto current_token = prefill(input_tokens); diff --git a/examples/models/phi-3-mini/runner.h b/examples/models/phi-3-mini/runner.h index 2048acdab27..2f0042a57ea 100644 --- a/examples/models/phi-3-mini/runner.h +++ b/examples/models/phi-3-mini/runner.h @@ -15,9 +15,9 @@ #include #include -#include #include #include +#include namespace example { @@ -43,7 +43,7 @@ class Runner { uint64_t run_model_step(uint64_t token); std::unique_ptr module_; - std::unique_ptr tokenizer_; + std::unique_ptr tokenizer_; std::unique_ptr sampler_; }; diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index db7ac468b5e..dafc911a172 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -13,10 +13,11 @@ #include #include #include -#include #include #include #include +#include + #include #include #include @@ -191,19 +192,19 @@ Error Runner::load() { // llama3 tokenizer tokenizer_ = example::get_tiktoken_for_llama(); - Error err = tokenizer_->load(tokenizer_path_); - if (err == Error::InvalidArgument) { + auto err = tokenizer_->load(tokenizer_path_); + if (err != tokenizers::Error::Ok) { ET_LOG( Info, "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer", tokenizer_path_.c_str()); tokenizer_.reset(); // llama2 tokenizer - tokenizer_ = std::make_unique(); + tokenizer_ = std::make_unique(); err = tokenizer_->load(tokenizer_path_); llama_version_ = LlamaVersion::kLlama2; ET_CHECK_MSG( - err == Error::Ok, + err == tokenizers::Error::Ok, "failed to load tokenizer %s", tokenizer_path_.c_str()); } else { @@ -335,9 +336,9 @@ Error Runner::generate( } seq_len = (seq_len > 0 && seq_len <= context_len_) ? seq_len : context_len_; - Result> encode_res = + tokenizers::Result> encode_res = tokenizer_->encode(prompt_, n_bos_, 0); - ET_CHECK_OK_OR_RETURN_ERROR( + ET_CHECK_TK_OK_OR_RETURN_ERROR( encode_res.error(), "failed to encode prompt %s", prompt_.c_str()); std::vector prompt_tokens = encode_res.get(); diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.h b/examples/qualcomm/oss_scripts/llama/runner/runner.h index a12443d7c96..e693bcd7077 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.h +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.h @@ -19,8 +19,8 @@ #include #include -#include #include +#include namespace example { @@ -107,7 +107,7 @@ class Runner { float logits_scale_; int32_t logits_offset_; float temperature_; - std::unique_ptr tokenizer_; + std::unique_ptr tokenizer_; std::unique_ptr sampler_; Stats stats_; std::unique_ptr io_mgr_; diff --git a/examples/qualcomm/oss_scripts/llama/targets.bzl b/examples/qualcomm/oss_scripts/llama/targets.bzl index d1cca3a6ec0..c3f7e7fbbda 100644 --- a/examples/qualcomm/oss_scripts/llama/targets.bzl +++ b/examples/qualcomm/oss_scripts/llama/targets.bzl @@ -26,9 +26,9 @@ def define_common_targets(): "//executorch/extension/module:module", "//executorch/extension/llm/sampler:sampler", "//executorch/examples/models/llama/tokenizer:tiktoken", - "//executorch/extension/llm/tokenizer:bpe_tokenizer", "//executorch/extension/evalue_util:print_evalue", "//executorch/backends/qualcomm/runtime:runtime", + "//pytorch/tokenizers:llama2c_tokenizer", ], external_deps = [ "gflags", diff --git a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt index def35cb3aa7..f96d0169809 100644 --- a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt +++ b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt @@ -27,7 +27,7 @@ list(PREPEND _qaihub_llama2_7b_runner__srcs # build qaihub llama2 7b runner add_executable(qaihub_llama2_7b_runner ${_qaihub_llama2_7b_runner__srcs}) target_include_directories( - qaihub_llama2_7b_runner PUBLIC ${_common_include_directories} + qaihub_llama2_7b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include ) target_link_libraries( qaihub_llama2_7b_runner @@ -58,7 +58,7 @@ list(APPEND _common_compile_options -DQAIHUB_LLAMA3_RUNNER) list( APPEND _qaihub_llama3_8b_runner__srcs - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/src/tiktoken.cpp ) list( APPEND @@ -69,7 +69,7 @@ list( # build qaihub llama3 8b runner add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs}) target_include_directories( - qaihub_llama3_8b_runner PUBLIC ${_common_include_directories} + qaihub_llama3_8b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include ) target_link_libraries( diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp index 4bddb32b53e..06ea324ef6f 100644 --- a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp @@ -12,7 +12,7 @@ #if defined(QAIHUB_LLAMA3_RUNNER) #include #else -#include +#include #endif #include #include @@ -79,7 +79,7 @@ Runner::Runner( eos_id_.insert(tokenizer_->encode("<|eot_id|>", 0, 0).get()[0]); version_ = LlamaVersion::kLlama3; #else - tokenizer_ = std::make_unique(); + tokenizer_ = std::make_unique(); tokenizer_->load(tokenizer_path_); version_ = LlamaVersion::kLlama2; #endif @@ -231,9 +231,9 @@ Error Runner::generate( break; } - Result> encode_res = + tokenizers::Result> encode_res = tokenizer_->encode(post_process_prompt, n_bos_, 0); - ET_CHECK_OK_OR_RETURN_ERROR( + ET_CHECK_TK_OK_OR_RETURN_ERROR( encode_res.error(), "failed to encode prompt %s", post_process_prompt.c_str()); diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/runner.h b/examples/qualcomm/qaihub_scripts/llama/runner/runner.h index be9af7e2275..9672d6a3586 100644 --- a/examples/qualcomm/qaihub_scripts/llama/runner/runner.h +++ b/examples/qualcomm/qaihub_scripts/llama/runner/runner.h @@ -19,8 +19,8 @@ #include #include -#include #include +#include namespace example { @@ -101,7 +101,7 @@ class Runner { std::vector method_names_; std::string tokenizer_path_; float temperature_; - std::unique_ptr tokenizer_; + std::unique_ptr tokenizer_; std::unique_ptr sampler_; Stats stats_; std::unique_ptr io_mem_; diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt index 7adb980d224..6a0f8ad020f 100644 --- a/extension/llm/runner/CMakeLists.txt +++ b/extension/llm/runner/CMakeLists.txt @@ -51,5 +51,5 @@ target_link_libraries(extension_llm_runner PUBLIC ${runner_deps}) target_include_directories( extension_llm_runner INTERFACE ${_common_include_directories} - ${EXECUTORCH_ROOT} + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include ) diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h index 94539c65cc6..c17e039c11b 100644 --- a/extension/llm/runner/multimodal_runner.h +++ b/extension/llm/runner/multimodal_runner.h @@ -26,8 +26,8 @@ #include #include #include -#include #include +#include namespace executorch { namespace extension { @@ -129,7 +129,7 @@ class ET_EXPERIMENTAL MultimodalRunner { std::unique_ptr image_prefiller_; std::unique_ptr text_token_generator_; std::string tokenizer_path_; - std::unique_ptr tokenizer_; + std::unique_ptr<::tokenizers::Tokenizer> tokenizer_; // stats Stats stats_; diff --git a/extension/llm/runner/targets.bzl b/extension/llm/runner/targets.bzl index aa42c22b1b9..03b593cacf5 100644 --- a/extension/llm/runner/targets.bzl +++ b/extension/llm/runner/targets.bzl @@ -49,7 +49,7 @@ def define_common_targets(): ], exported_deps = [ ":text_decoder_runner" + aten_suffix, - "//executorch/extension/llm/tokenizer:tokenizer_header", + "//pytorch/tokenizers:headers", "//executorch/extension/module:module" + aten_suffix, "//executorch/extension/tensor:tensor" + aten_suffix, ], @@ -63,7 +63,7 @@ def define_common_targets(): ], exported_deps = [ ":text_decoder_runner" + aten_suffix, - "//executorch/extension/llm/tokenizer:tokenizer_header", + "//pytorch/tokenizers:headers", "//executorch/extension/module:module" + aten_suffix, "//executorch/extension/tensor:tensor" + aten_suffix, ], diff --git a/extension/llm/runner/text_prefiller.h b/extension/llm/runner/text_prefiller.h index 2f1d5ae2b75..007f8188f56 100644 --- a/extension/llm/runner/text_prefiller.h +++ b/extension/llm/runner/text_prefiller.h @@ -12,8 +12,6 @@ #pragma once #include -#include -#include namespace executorch { namespace extension { diff --git a/extension/llm/runner/text_token_generator.h b/extension/llm/runner/text_token_generator.h index 62b924a57d8..e8bf891f8ec 100644 --- a/extension/llm/runner/text_token_generator.h +++ b/extension/llm/runner/text_token_generator.h @@ -11,8 +11,8 @@ #include #include -#include #include +#include namespace executorch { namespace extension { @@ -21,7 +21,7 @@ namespace llm { class ET_EXPERIMENTAL TextTokenGenerator { public: TextTokenGenerator( - Tokenizer* tokenizer, + ::tokenizers::Tokenizer* tokenizer, TextDecoderRunner* text_decoder_runner, bool use_kv_cache, std::unique_ptr>&& eos_ids, @@ -106,7 +106,8 @@ class ET_EXPERIMENTAL TextTokenGenerator { } // print the token as string, decode it with the Tokenizer object - token_callback(ET_UNWRAP(tokenizer_->decode(prev_token, cur_token))); + token_callback( + ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token))); if (should_stop_) { break; @@ -130,7 +131,7 @@ class ET_EXPERIMENTAL TextTokenGenerator { } private: - Tokenizer* tokenizer_; + ::tokenizers::Tokenizer* tokenizer_; TextDecoderRunner* text_decoder_runner_; std::unique_ptr> eos_ids_; bool use_kv_cache_; diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h index 04d4eccc4a7..d6ab23827f9 100644 --- a/extension/llm/runner/util.h +++ b/extension/llm/runner/util.h @@ -15,6 +15,30 @@ #include #endif +#define ET_UNWRAP_TOKENIZER(result__) \ + ({ \ + auto tk_result__ = (result__); \ + if (!tk_result__.ok()) { \ + ET_LOG( \ + Error, \ + "Tokenizers error code %d", \ + static_cast(tk_result__.error())); \ + return ::executorch::runtime::Error::InvalidArgument; \ + } \ + std::move(*tk_result__); \ + }) + +#define ET_CHECK_TK_OK_OR_RETURN_ERROR(result__, ...) \ + ({ \ + auto tk_result__ = (result__); \ + if (tk_result__ != ::tokenizers::Error::Ok) { \ + ET_LOG( \ + Error, "Tokenizer error: %d", static_cast(tk_result__)); \ + ET_CHECK_OK_OR_RETURN_ERROR( \ + ::executorch::runtime::Error::InvalidArgument, ##__VA_ARGS__); \ + } \ + }) + namespace executorch { namespace extension { namespace llm { diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index 4da2387af9c..ec61ab1489e 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit 4da2387af9c9ab8730c1bcbcd2f49385a7ba6bbb +Subproject commit ec61ab1489e2d0fb6ac82b39288ce505bf8bdeca diff --git a/install_executorch.py b/install_executorch.py index 0d82f0a05ca..1c5959cd8bb 100644 --- a/install_executorch.py +++ b/install_executorch.py @@ -14,6 +14,7 @@ import shutil import subprocess import sys +from contextlib import contextmanager from install_requirements import ( install_requirements, @@ -28,6 +29,17 @@ logger = logging.getLogger() +@contextmanager +def pushd(new_dir): + """Change the current directory to new_dir and yield. When exiting the context, change back to the original directory.""" + original_dir = os.getcwd() + os.chdir(new_dir) + try: + yield + finally: + os.chdir(original_dir) + + def clean(): print("Cleaning build artifacts...") print("Cleaning pip-out/...") @@ -66,6 +78,7 @@ def clean(): "pthreadpool": "CMakeLists.txt", "pybind11": "CMakeLists.txt", "shim": "BUCK", + "tokenizers": "CMakeLists.txt", "XNNPACK": "CMakeLists.txt", } @@ -117,6 +130,11 @@ def check_folder(folder: str, file: str) -> bool: logger.error(f"{file} not found in {path}.") logger.error("Please run `git submodule update --init`.") exit(1) + # Go into tokenizers submodule and install its submodules + tokenizers_path = get_required_submodule_paths().get("tokenizers", None) + if tokenizers_path: + with pushd(tokenizers_path): + subprocess.check_call(["git", "submodule", "update", "--init"]) logger.info("All required submodules are present.") diff --git a/shim_et/xplat/executorch/build/env_interface.bzl b/shim_et/xplat/executorch/build/env_interface.bzl index 43d3cb644e3..d34ad8c8042 100644 --- a/shim_et/xplat/executorch/build/env_interface.bzl +++ b/shim_et/xplat/executorch/build/env_interface.bzl @@ -10,11 +10,15 @@ load(":type_defs.bzl", "is_list", "is_tuple") _ET_TARGET_PREFIX = "executorch" +_TOKENIZER_TARGET_PREFIX = "pytorch/tokenizers" + # Indicates that an external_dep entry should fall through to the underlying # buck rule. _EXTERNAL_DEP_FALLTHROUGH = "" _EXTERNAL_DEPS = { + # Abseil for tokenizers + "abseil-cpp": "//extension/llm/tokenizers/third-party:abseil", # ATen C++ library deps "aten-core": [], # TODO(larryliu0820): Add support # ATen native_functions.yaml file deps @@ -46,7 +50,6 @@ _EXTERNAL_DEPS = { "re2": "//extension/llm/tokenizers/third-party:re2", "sentencepiece": [], # Intentionally not supporting OSS buck build of sentencepiece. "sentencepiece-py": [], - "tiktoken": "//extension/llm/tokenizers:tiktoken", # Core C++ PyTorch functionality like Tensor and ScalarType. "torch-core-cpp": "//third-party:libtorch", "torchgen": "//third-party:torchgen", @@ -66,10 +69,11 @@ def _resolve_external_dep(name): return [res] def _start_with_et_targets(target): - prefix = "//" + _ET_TARGET_PREFIX - for suffix in ("/", ":"): - if target.startswith(prefix + suffix): - return True + for prefix in [_ET_TARGET_PREFIX, _TOKENIZER_TARGET_PREFIX]: + prefix = "//" + prefix + for suffix in ("/", ":"): + if target.startswith(prefix + suffix): + return True return False def _patch_platforms(kwargs): @@ -199,7 +203,11 @@ def _target_needs_patch(target): return _start_with_et_targets(target) or target.startswith(":") def _patch_target_for_env(target): - return target.replace("//executorch/", "//", 1) + if _ET_TARGET_PREFIX in target: + return target.replace("//executorch/", "//", 1) + elif _TOKENIZER_TARGET_PREFIX in target: + return target.replace("//pytorch/tokenizers", "//extension/llm/tokenizers", 1) + return target def _struct_to_json(object): # @lint-ignore BUCKLINT: native and fb_native are explicitly forbidden in fbcode. @@ -245,6 +253,7 @@ env = struct( remove_unsupported_kwargs = _remove_unsupported_kwargs, resolve_external_dep = _resolve_external_dep, struct_to_json = _struct_to_json, + supported_platforms = [], target_needs_patch = _target_needs_patch, EXTERNAL_DEP_FALLTHROUGH = _EXTERNAL_DEP_FALLTHROUGH, ) diff --git a/shim_et/xplat/executorch/build/runtime_wrapper.bzl b/shim_et/xplat/executorch/build/runtime_wrapper.bzl index c0e8103f52f..ff5c6de6d96 100644 --- a/shim_et/xplat/executorch/build/runtime_wrapper.bzl +++ b/shim_et/xplat/executorch/build/runtime_wrapper.bzl @@ -38,6 +38,9 @@ def struct_to_json(x): def get_default_executorch_platforms(): return env.default_platforms +def get_executorch_supported_platforms(): + return env.supported_platforms + def _patch_executorch_references(targets, use_static_deps = False): """Patches up references to "//executorch/..." in lists of build targets. diff --git a/test/build_size_test.sh b/test/build_size_test.sh index 09c0188ff9b..f7f9a0152d2 100644 --- a/test/build_size_test.sh +++ b/test/build_size_test.sh @@ -17,7 +17,7 @@ COMMON_CXXFLAGS="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-contex cmake_install_executorch_lib() { echo "Installing libexecutorch.a" clean_executorch_install_folders - + update_tokenizers_git_submodule CXXFLAGS="$COMMON_CXXFLAGS" retry cmake -DBUCK2="$BUCK2" \ -DCMAKE_CXX_STANDARD_REQUIRED=ON \ -DCMAKE_INSTALL_PREFIX=cmake-out \