From 9ce3d499dcb97b5ea910cb131679093e5c122491 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Sun, 16 Mar 2025 18:39:05 -0700
Subject: [PATCH] Migrate users of llm tokenizer to use pytorch-labs/tokenizers
 (#9114)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/9114

Finally migrate llm tokenizer usages to pytorch-labs/tokenizers.

Reviewed By: iseeyuan

Differential Revision: D70932091
---
 .ci/scripts/utils.sh                          |  8 ++++
 examples/mediatek/CMakeLists.txt              |  8 ++--
 .../mtk_llama_executor_runner.cpp             |  8 ++--
 .../executor_runner/mtk_llama_runner.h        |  6 +--
 examples/models/llama/CMakeLists.txt          |  6 ++-
 examples/models/llama/runner/CMakeLists.txt   |  9 +++--
 examples/models/llama/runner/runner.cpp       | 21 +++++++----
 examples/models/llama/runner/runner.h         |  4 +-
 examples/models/llama/runner/targets.bzl      |  2 +-
 .../models/llama/tokenizer/llama_tiktoken.cpp | 37 +++++++++----------
 .../models/llama/tokenizer/llama_tiktoken.h   |  6 ++-
 examples/models/llama/tokenizer/targets.bzl   |  3 +-
 .../llama/tokenizer/test/test_tiktoken.cpp    |  6 +--
 examples/models/llava/runner/CMakeLists.txt   |  5 ++-
 examples/models/llava/runner/llava_runner.cpp |  6 +--
 examples/models/llava/runner/targets.bzl      |  2 +-
 examples/models/phi-3-mini/CMakeLists.txt     |  3 +-
 examples/models/phi-3-mini/runner.cpp         | 12 +++---
 examples/models/phi-3-mini/runner.h           |  4 +-
 .../oss_scripts/llama/runner/runner.cpp       | 15 ++++----
 .../oss_scripts/llama/runner/runner.h         |  4 +-
 .../qualcomm/oss_scripts/llama/targets.bzl    |  2 +-
 .../qaihub_scripts/llama/CMakeLists.txt       |  6 +--
 .../qaihub_scripts/llama/runner/runner.cpp    |  8 ++--
 .../qaihub_scripts/llama/runner/runner.h      |  4 +-
 extension/llm/runner/CMakeLists.txt           |  2 +-
 extension/llm/runner/multimodal_runner.h      |  4 +-
 extension/llm/runner/targets.bzl              |  4 +-
 extension/llm/runner/text_prefiller.h         |  2 -
 extension/llm/runner/text_token_generator.h   |  9 +++--
 extension/llm/runner/util.h                   | 24 ++++++++++++
 extension/llm/tokenizers                      |  2 +-
 install_executorch.py                         | 18 +++++++++
 .../xplat/executorch/build/env_interface.bzl  | 21 ++++++++---
 .../executorch/build/runtime_wrapper.bzl      |  3 ++
 test/build_size_test.sh                       |  2 +-
 36 files changed, 183 insertions(+), 103 deletions(-)

diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh
index 8d99a0022cf..c710196d896 100644
--- a/.ci/scripts/utils.sh
+++ b/.ci/scripts/utils.sh
@@ -20,6 +20,14 @@ clean_executorch_install_folders() {
   ./install_executorch.sh --clean
 }
 
+update_tokenizers_git_submodule() {
+  echo "Updating tokenizers git submodule..."
+  git submodule update --init
+  pushd extension/llm/tokenizers
+  git submodule update --init
+  popd
+}
+
 install_executorch() {
   which pip
   # Install executorch, this assumes that Executorch is checked out in the
diff --git a/examples/mediatek/CMakeLists.txt b/examples/mediatek/CMakeLists.txt
index d8e2a5bf667..111c741c6aa 100644
--- a/examples/mediatek/CMakeLists.txt
+++ b/examples/mediatek/CMakeLists.txt
@@ -137,18 +137,18 @@ if(${ANDROID})
   set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
 
   # Build tokenizers
-  set(LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR}/tokenizer)
+  set(LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR}/tokenizers)
   add_library(tokenizer STATIC)
   target_include_directories(
     tokenizer PUBLIC ${_common_include_directories} ${THIRD_PARTY_ABSL_DIR}
-                     ${THIRD_PARTY_RE2_DIR}
+                     ${THIRD_PARTY_RE2_DIR} ${LLAMA2_TOKENIZER_DIR}/include
   )
   target_link_libraries(tokenizer PRIVATE re2::re2)
   target_sources(
     tokenizer
     PRIVATE
-      ${LLAMA2_TOKENIZER_DIR}/tiktoken.cpp
-      ${LLAMA2_TOKENIZER_DIR}/bpe_tokenizer.cpp
+      ${LLAMA2_TOKENIZER_DIR}/src/tiktoken.cpp
+      ${LLAMA2_TOKENIZER_DIR}/src/llama2c_tokenizer.cpp
       ${CMAKE_CURRENT_SOURCE_DIR}/../models/llama/tokenizer/llama_tiktoken.cpp
   )
 
diff --git a/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp b/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp
index 4fba0e20a81..73fc8a0fb89 100644
--- a/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp
+++ b/examples/mediatek/executor_runner/mtk_llama_executor_runner.cpp
@@ -68,8 +68,8 @@
 #include "llama_runner/llm_helper/include/llm_types.h"
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
-#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
-#include <executorch/extension/llm/tokenizer/tiktoken.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
+#include <pytorch/tokenizers/tiktoken.h>
 
 // Llama model options
 DEFINE_uint64(
@@ -140,10 +140,10 @@ using example::utils::read_file;
 using example::utils::split;
 using example::utils::Timer;
 using example::utils::to_string;
-using executorch::extension::llm::BPETokenizer;
-using executorch::extension::llm::Tokenizer;
 using executorch::runtime::Error;
 using executorch::runtime::Result;
+using tokenizers::Llama2cTokenizer;
+using tokenizers::Tokenizer;
 
 LlamaModelOptions get_model_options() {
   LlamaModelOptions options = {
diff --git a/examples/mediatek/executor_runner/mtk_llama_runner.h b/examples/mediatek/executor_runner/mtk_llama_runner.h
index 4c7b35d1a88..0f76f610a7e 100644
--- a/examples/mediatek/executor_runner/mtk_llama_runner.h
+++ b/examples/mediatek/executor_runner/mtk_llama_runner.h
@@ -14,8 +14,8 @@
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #include <executorch/extension/llm/runner/irunner.h>
 #include <executorch/extension/llm/runner/stats.h>
-#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
-#include <executorch/extension/llm/tokenizer/tiktoken.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
+#include <pytorch/tokenizers/tiktoken.h>
 #include <cstdint>
 #include <functional>
 #include <memory>
@@ -28,9 +28,9 @@ using Stats = ::executorch::llm::Stats;
 using example::LlamaModelOptions;
 using example::LlamaModelPaths;
 using example::LlamaRuntime;
-using executorch::extension::llm::Tokenizer;
 using executorch::runtime::Error;
 using executorch::runtime::Result;
+using tokenizers::Tokenizer;
 
 class MTKLlamaRunner : public executorch::extension::llm::IRunner {
  public:
diff --git a/examples/models/llama/CMakeLists.txt b/examples/models/llama/CMakeLists.txt
index 96ff28d8f49..50bb64a159b 100644
--- a/examples/models/llama/CMakeLists.txt
+++ b/examples/models/llama/CMakeLists.txt
@@ -209,7 +209,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
   endif()
 endif()
 
-target_include_directories(llama_main PUBLIC ${_common_include_directories})
+target_include_directories(
+  llama_main
+  PUBLIC ${_common_include_directories}
+         ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
+)
 target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
 target_compile_options(llama_main PUBLIC ${_common_compile_options})
 
diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt
index 919bc356551..f423a5a20f1 100644
--- a/examples/models/llama/runner/CMakeLists.txt
+++ b/examples/models/llama/runner/CMakeLists.txt
@@ -43,7 +43,7 @@ target_include_directories(
 
 list(
   APPEND _llama_runner__srcs
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
+  ${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/tiktoken.cpp
 )
 list(APPEND _llama_runner__srcs
      ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
@@ -83,7 +83,10 @@ target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
 
 target_include_directories(
   llama_runner
-  INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT}
-            ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
+  INTERFACE ${_common_include_directories}
+)
+target_include_directories(
+  llama_runner
+  PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
 target_compile_options(llama_runner PUBLIC ${_preprocessor_flag})
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
index f6820b8701e..e0a317aaff3 100644
--- a/examples/models/llama/runner/runner.cpp
+++ b/examples/models/llama/runner/runner.cpp
@@ -16,7 +16,7 @@
 #include <executorch/extension/llm/runner/util.h>
 
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
-#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 
 namespace example {
 
@@ -78,17 +78,21 @@ Error Runner::load() {
   // load tokenizer. Assuming tiktoken is the default tokenizer
   tokenizer_ = nullptr;
   tokenizer_ = get_tiktoken_for_llama();
-  Error err = tokenizer_->load(tokenizer_path_);
+  ::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
   // Rely on tiktoken to throw error if the artifact is incompatible. Then we
   // fallback to BPE tokenizer.
-  if (err == Error::InvalidArgument) {
+  if (err != ::tokenizers::Error::Ok) {
     ET_LOG(
         Info,
         "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
         tokenizer_path_.c_str());
     tokenizer_.reset();
-    tokenizer_ = std::make_unique<llm::BPETokenizer>();
-    tokenizer_->load(tokenizer_path_);
+    tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
+    err = tokenizer_->load(tokenizer_path_);
+    ET_CHECK_TK_OK_OR_RETURN_ERROR(
+        err,
+        "Failed to load %s as a llama2.c tokenizer artifact",
+        tokenizer_path_.c_str());
   }
 
   ET_LOG(Info, "Reading metadata from model");
@@ -201,12 +205,12 @@ Error Runner::generate(
       ? seq_len
       : metadata_.at(kMaxSeqLen);
 
-  Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
+  ::tokenizers::Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
       prompt,
       /* bos */ 0,
       /* eos */ 0);
 
-  ET_CHECK_OK_OR_RETURN_ERROR(
+  ET_CHECK_TK_OK_OR_RETURN_ERROR(
       encode_res.error(), "Failed to encode prompt %s", prompt.c_str());
 
   // encode the (string) prompt into tokens sequence
@@ -242,7 +246,8 @@ Error Runner::generate(
   uint64_t cur_token = prefill_res.get();
 
   // print the first token from prefill. No prev_token so use cur_token for it.
-  wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token)));
+  wrapped_callback(
+      ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
   RUNNER_ET_LOG(
       warmup,
       "RSS after prompt prefill: %f MiB (0 if unsupported)",
diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h
index 1acce2f8e92..509fe234027 100644
--- a/examples/models/llama/runner/runner.h
+++ b/examples/models/llama/runner/runner.h
@@ -23,8 +23,8 @@
 #include <executorch/extension/llm/runner/text_decoder_runner.h>
 #include <executorch/extension/llm/runner/text_prefiller.h>
 #include <executorch/extension/llm/runner/text_token_generator.h>
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
 #include <executorch/extension/module/module.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace example {
 
@@ -58,7 +58,7 @@ class ET_EXPERIMENTAL Runner : public executorch::extension::llm::IRunner {
   // model
   std::unique_ptr<::executorch::extension::Module> module_;
   std::string tokenizer_path_;
-  std::unique_ptr<::executorch::extension::llm::Tokenizer> tokenizer_;
+  std::unique_ptr<::tokenizers::Tokenizer> tokenizer_;
   std::unordered_map<std::string, int64_t> metadata_;
   std::unique_ptr<::executorch::extension::llm::TextDecoderRunner>
       text_decoder_runner_;
diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl
index 37827bb78a5..60fc1f2b74d 100644
--- a/examples/models/llama/runner/targets.bzl
+++ b/examples/models/llama/runner/targets.bzl
@@ -48,7 +48,7 @@ def define_common_targets():
                 "//executorch/runtime/core/exec_aten:lib" + aten_suffix,
                 "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
                 "//executorch/examples/models/llama/tokenizer:tiktoken",
-                "//executorch/extension/llm/tokenizer:bpe_tokenizer",
+                "//pytorch/tokenizers:llama2c_tokenizer",
             ] + (_get_operator_lib(aten)) + ([
                 # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
                 # Therefore enable it explicitly for now to avoid failing tests
diff --git a/examples/models/llama/tokenizer/llama_tiktoken.cpp b/examples/models/llama/tokenizer/llama_tiktoken.cpp
index 74eacc1b5f0..f595de3c4e7 100644
--- a/examples/models/llama/tokenizer/llama_tiktoken.cpp
+++ b/examples/models/llama/tokenizer/llama_tiktoken.cpp
@@ -10,7 +10,7 @@
 
 namespace example {
 
-using ::executorch::extension::llm::Tiktoken;
+using ::tokenizers::Tiktoken;
 
 namespace {
 static constexpr int32_t kSpecialTokensSize = 256;
@@ -42,8 +42,23 @@ _get_default_special_tokens() {
   return special_tokens;
 }
 
-static inline std::unique_ptr<std::vector<std::string>>
-_get_multimodal_special_tokens() {
+std::unique_ptr<std::vector<std::string>> _get_special_tokens(Version version) {
+  switch (version) {
+    case Version::Multimodal:
+      return get_multimodal_special_tokens();
+    default:
+      return _get_default_special_tokens();
+  }
+}
+
+} // namespace
+
+std::unique_ptr<Tiktoken> get_tiktoken_for_llama(Version version) {
+  return std::make_unique<Tiktoken>(
+      _get_special_tokens(version), kBOSTokenIndex, kEOSTokenIndex);
+}
+
+std::unique_ptr<std::vector<std::string>> get_multimodal_special_tokens() {
   auto special_tokens =
       std::make_unique<std::vector<std::string>>(std::vector<std::string>{
           "<|begin_of_text|>",
@@ -72,20 +87,4 @@ _get_multimodal_special_tokens() {
   return special_tokens;
 }
 
-std::unique_ptr<std::vector<std::string>> _get_special_tokens(Version version) {
-  switch (version) {
-    case Version::Multimodal:
-      return _get_multimodal_special_tokens();
-    default:
-      return _get_default_special_tokens();
-  }
-}
-
-} // namespace
-
-std::unique_ptr<Tiktoken> get_tiktoken_for_llama(Version version) {
-  return std::make_unique<Tiktoken>(
-      _get_special_tokens(version), kBOSTokenIndex, kEOSTokenIndex);
-}
-
 } // namespace example
diff --git a/examples/models/llama/tokenizer/llama_tiktoken.h b/examples/models/llama/tokenizer/llama_tiktoken.h
index 6baa3f49cc6..a7f65eca29e 100644
--- a/examples/models/llama/tokenizer/llama_tiktoken.h
+++ b/examples/models/llama/tokenizer/llama_tiktoken.h
@@ -8,7 +8,7 @@
 
 #pragma once
 
-#include <executorch/extension/llm/tokenizer/tiktoken.h>
+#include <pytorch/tokenizers/tiktoken.h>
 
 namespace example {
 
@@ -17,7 +17,9 @@ enum class Version {
   Multimodal,
 };
 
-std::unique_ptr<::executorch::extension::llm::Tiktoken> get_tiktoken_for_llama(
+std::unique_ptr<::tokenizers::Tiktoken> get_tiktoken_for_llama(
     Version version = Version::Default);
 
+std::unique_ptr<std::vector<std::string>> get_multimodal_special_tokens();
+
 } // namespace example
diff --git a/examples/models/llama/tokenizer/targets.bzl b/examples/models/llama/tokenizer/targets.bzl
index 40f8f29ac1e..704ebfeecb6 100644
--- a/examples/models/llama/tokenizer/targets.bzl
+++ b/examples/models/llama/tokenizer/targets.bzl
@@ -15,7 +15,8 @@ def define_common_targets():
             "llama_tiktoken.h",
         ],
         exported_deps = [
-            "//executorch/extension/llm/tokenizer:tiktoken",
+            "//pytorch/tokenizers:tiktoken",
+            "//executorch/extension/llm/tokenizer:tiktoken", # TODO: remove
         ],
         visibility = [
             "@EXECUTORCH_CLIENTS",
diff --git a/examples/models/llama/tokenizer/test/test_tiktoken.cpp b/examples/models/llama/tokenizer/test/test_tiktoken.cpp
index 442da621748..cbb54f2cb3b 100644
--- a/examples/models/llama/tokenizer/test/test_tiktoken.cpp
+++ b/examples/models/llama/tokenizer/test/test_tiktoken.cpp
@@ -10,7 +10,7 @@
 
 #include <vector>
 
-#include <executorch/runtime/platform/runtime.h>
+#include <executorch/extension/llm/tokenizer/tiktoken.h>
 
 #include <gtest/gtest.h>
 
@@ -36,8 +36,8 @@ static std::string get_resource_path(const std::string& name) {
 class MultimodalTiktokenV5ExtensionTest : public Test {
  public:
   void SetUp() override {
-    executorch::runtime::runtime_init();
-    tokenizer_ = get_tiktoken_for_llama(Version::Multimodal);
+    tokenizer_ = std::make_unique<executorch::extension::llm::Tiktoken>(
+        example::get_multimodal_special_tokens(), 0, 1);
     modelPath_ = get_resource_path("test_tiktoken_tokenizer.model");
   }
 
diff --git a/examples/models/llava/runner/CMakeLists.txt b/examples/models/llava/runner/CMakeLists.txt
index 7bad4a827ae..de1de6f501c 100644
--- a/examples/models/llava/runner/CMakeLists.txt
+++ b/examples/models/llava/runner/CMakeLists.txt
@@ -29,7 +29,7 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 set(_llava_runner__srcs
     "${CMAKE_CURRENT_SOURCE_DIR}/llava_runner.cpp"
     "${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp"
-    "${EXECUTORCH_ROOT}/extension/llm/tokenizer/bpe_tokenizer.cpp"
+    "${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/llama2c_tokenizer.cpp"
 )
 
 # extension llm runner lib
@@ -47,5 +47,6 @@ set(llava_runner_deps executorch extension_data_loader extension_llm_runner
 target_link_libraries(llava_runner PUBLIC ${llava_runner_deps})
 
 target_include_directories(
-  llava_runner INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT}
+  llava_runner INTERFACE ${_common_include_directories}
+                         ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp
index b3c0cce5c33..d368f8fb1a4 100644
--- a/examples/models/llava/runner/llava_runner.cpp
+++ b/examples/models/llava/runner/llava_runner.cpp
@@ -13,7 +13,7 @@
 #include <executorch/examples/models/llava/runner/llava_image_prefiller.h>
 #include <executorch/examples/models/llava/runner/llava_runner.h>
 #include <executorch/examples/models/llava/runner/llava_text_decoder_runner.h>
-#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 
 #include <ctime>
 #include <memory>
@@ -43,7 +43,7 @@ Error LlavaRunner::load() {
   stats_.model_load_start_ms = llm::time_in_ms();
 
   // Load the tokenizer
-  tokenizer_ = std::make_unique<llm::BPETokenizer>();
+  tokenizer_ = std::make_unique<tokenizers::Llama2cTokenizer>();
   tokenizer_->load(tokenizer_path_);
 
   // Load the text decoder runner
@@ -90,7 +90,7 @@ Result<uint64_t> LlavaRunner::prefill_prompt(
     int8_t bos,
     int8_t eos) {
   std::vector<uint64_t> prompt_tokens =
-      ET_UNWRAP(tokenizer_->encode(prompt, bos, eos));
+      ET_UNWRAP_TOKENIZER(tokenizer_->encode(prompt, bos, eos));
 
   return text_prefiller_->prefill(prompt_tokens, start_pos);
 }
diff --git a/examples/models/llava/runner/targets.bzl b/examples/models/llava/runner/targets.bzl
index 63fcc9d3b33..074c92b35e3 100644
--- a/examples/models/llava/runner/targets.bzl
+++ b/examples/models/llava/runner/targets.bzl
@@ -14,7 +14,6 @@ def define_common_targets():
         exported_deps = [
             "//executorch/backends/xnnpack:xnnpack_backend",
             "//executorch/extension/llm/runner:runner_lib",
-            "//executorch/extension/llm/tokenizer:bpe_tokenizer",
             "//executorch/extension/evalue_util:print_evalue",
             "//executorch/extension/module:module",
             "//executorch/extension/tensor:tensor",
@@ -23,5 +22,6 @@ def define_common_targets():
             "//executorch/runtime/core/exec_aten/util:tensor_util",
             "//executorch/configurations:optimized_native_cpu_ops", 
             "//executorch/extension/llm/custom_ops:custom_ops",
+            "//pytorch/tokenizers:llama2c_tokenizer",
         ],
     )
diff --git a/examples/models/phi-3-mini/CMakeLists.txt b/examples/models/phi-3-mini/CMakeLists.txt
index e1ffd0da055..5e9cad0d3de 100644
--- a/examples/models/phi-3-mini/CMakeLists.txt
+++ b/examples/models/phi-3-mini/CMakeLists.txt
@@ -41,11 +41,12 @@ add_executable(
   phi_3_mini_runner
   main.cpp runner.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/sampler/sampler.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizer/bpe_tokenizer.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/src/llama2c_tokenizer.cpp
 )
 target_include_directories(
   phi_3_mini_runner
   PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/gflags/src
+         ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/include
 )
 target_link_libraries(
   phi_3_mini_runner PRIVATE executorch extension_module_static extension_tensor
diff --git a/examples/models/phi-3-mini/runner.cpp b/examples/models/phi-3-mini/runner.cpp
index 1163a35d66b..15f76e9522c 100644
--- a/examples/models/phi-3-mini/runner.cpp
+++ b/examples/models/phi-3-mini/runner.cpp
@@ -11,15 +11,15 @@
 #include <ctime>
 #include <iostream>
 
-#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
 #include <executorch/extension/tensor/tensor.h>
 #include <executorch/runtime/platform/log.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 
 using executorch::aten::ScalarType;
 using executorch::extension::Module;
-using executorch::extension::llm::BPETokenizer;
 using executorch::extension::llm::Sampler;
 using executorch::runtime::Error;
+using tokenizers::Llama2cTokenizer;
 
 namespace example {
 
@@ -32,14 +32,14 @@ Runner::Runner(
     const std::string& tokenizer_path,
     const float temperature)
     : module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
-      tokenizer_(std::make_unique<BPETokenizer>()),
+      tokenizer_(std::make_unique<Llama2cTokenizer>()),
       sampler_(std::make_unique<Sampler>(
           VOCABULARY_SIZE,
           temperature,
           SAMPLER_TOP,
           static_cast<unsigned long long>(std::time(nullptr)))) {
   ET_CHECK_MSG(
-      tokenizer_->load(tokenizer_path) == Error::Ok,
+      tokenizer_->load(tokenizer_path) == tokenizers::Error::Ok,
       "Failed to load tokenizer at %s",
       tokenizer_path.c_str());
   ET_LOG(
@@ -52,7 +52,9 @@ Runner::Runner(
 void Runner::generate(const std::string& prompt, std::size_t max_seq_len) {
   auto encode_res = tokenizer_->encode(prompt, 0, 0);
   ET_CHECK_MSG(
-      encode_res.error() == Error::Ok, "Failed to encode %s", prompt.c_str());
+      encode_res.error() == tokenizers::Error::Ok,
+      "Failed to encode %s",
+      prompt.c_str());
   auto input_tokens = encode_res.get();
   auto prev_token = input_tokens.back();
   auto current_token = prefill(input_tokens);
diff --git a/examples/models/phi-3-mini/runner.h b/examples/models/phi-3-mini/runner.h
index 2048acdab27..2f0042a57ea 100644
--- a/examples/models/phi-3-mini/runner.h
+++ b/examples/models/phi-3-mini/runner.h
@@ -15,9 +15,9 @@
 #include <string>
 
 #include <executorch/extension/llm/sampler/sampler.h>
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
 #include <executorch/extension/module/module.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace example {
 
@@ -43,7 +43,7 @@ class Runner {
   uint64_t run_model_step(uint64_t token);
 
   std::unique_ptr<executorch::extension::Module> module_;
-  std::unique_ptr<executorch::extension::llm::Tokenizer> tokenizer_;
+  std::unique_ptr<tokenizers::Tokenizer> tokenizer_;
   std::unique_ptr<executorch::extension::llm::Sampler> sampler_;
 };
 
diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
index db7ac468b5e..dafc911a172 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
@@ -13,10 +13,11 @@
 #include <executorch/examples/qualcomm/oss_scripts/llama/runner/runner.h>
 #include <executorch/extension/evalue_util/print_evalue.h>
 #include <executorch/extension/llm/runner/util.h>
-#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 #include <executorch/runtime/platform/log.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
+
 #include <ctime>
 #include <fstream>
 #include <sstream>
@@ -191,19 +192,19 @@ Error Runner::load() {
 
   // llama3 tokenizer
   tokenizer_ = example::get_tiktoken_for_llama();
-  Error err = tokenizer_->load(tokenizer_path_);
-  if (err == Error::InvalidArgument) {
+  auto err = tokenizer_->load(tokenizer_path_);
+  if (err != tokenizers::Error::Ok) {
     ET_LOG(
         Info,
         "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
         tokenizer_path_.c_str());
     tokenizer_.reset();
     // llama2 tokenizer
-    tokenizer_ = std::make_unique<executorch::extension::llm::BPETokenizer>();
+    tokenizer_ = std::make_unique<tokenizers::Llama2cTokenizer>();
     err = tokenizer_->load(tokenizer_path_);
     llama_version_ = LlamaVersion::kLlama2;
     ET_CHECK_MSG(
-        err == Error::Ok,
+        err == tokenizers::Error::Ok,
         "failed to load tokenizer %s",
         tokenizer_path_.c_str());
   } else {
@@ -335,9 +336,9 @@ Error Runner::generate(
   }
 
   seq_len = (seq_len > 0 && seq_len <= context_len_) ? seq_len : context_len_;
-  Result<std::vector<uint64_t>> encode_res =
+  tokenizers::Result<std::vector<uint64_t>> encode_res =
       tokenizer_->encode(prompt_, n_bos_, 0);
-  ET_CHECK_OK_OR_RETURN_ERROR(
+  ET_CHECK_TK_OK_OR_RETURN_ERROR(
       encode_res.error(), "failed to encode prompt %s", prompt_.c_str());
 
   std::vector<uint64_t> prompt_tokens = encode_res.get();
diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.h b/examples/qualcomm/oss_scripts/llama/runner/runner.h
index a12443d7c96..e693bcd7077 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/runner.h
+++ b/examples/qualcomm/oss_scripts/llama/runner/runner.h
@@ -19,8 +19,8 @@
 
 #include <executorch/examples/qualcomm/oss_scripts/llama/runner/io_manager.h>
 #include <executorch/extension/llm/sampler/sampler.h>
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
 #include <executorch/extension/module/module.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace example {
 
@@ -107,7 +107,7 @@ class Runner {
   float logits_scale_;
   int32_t logits_offset_;
   float temperature_;
-  std::unique_ptr<executorch::extension::llm::Tokenizer> tokenizer_;
+  std::unique_ptr<tokenizers::Tokenizer> tokenizer_;
   std::unique_ptr<executorch::extension::llm::Sampler> sampler_;
   Stats stats_;
   std::unique_ptr<IoMgrBase> io_mgr_;
diff --git a/examples/qualcomm/oss_scripts/llama/targets.bzl b/examples/qualcomm/oss_scripts/llama/targets.bzl
index d1cca3a6ec0..c3f7e7fbbda 100644
--- a/examples/qualcomm/oss_scripts/llama/targets.bzl
+++ b/examples/qualcomm/oss_scripts/llama/targets.bzl
@@ -26,9 +26,9 @@ def define_common_targets():
             "//executorch/extension/module:module",
             "//executorch/extension/llm/sampler:sampler",
             "//executorch/examples/models/llama/tokenizer:tiktoken",
-            "//executorch/extension/llm/tokenizer:bpe_tokenizer",
             "//executorch/extension/evalue_util:print_evalue",
             "//executorch/backends/qualcomm/runtime:runtime",
+            "//pytorch/tokenizers:llama2c_tokenizer",
         ],
         external_deps = [
             "gflags",
diff --git a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
index def35cb3aa7..f96d0169809 100644
--- a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
+++ b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
@@ -27,7 +27,7 @@ list(PREPEND _qaihub_llama2_7b_runner__srcs
 # build qaihub llama2 7b runner
 add_executable(qaihub_llama2_7b_runner ${_qaihub_llama2_7b_runner__srcs})
 target_include_directories(
-  qaihub_llama2_7b_runner PUBLIC ${_common_include_directories}
+  qaihub_llama2_7b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
 )
 target_link_libraries(
   qaihub_llama2_7b_runner
@@ -58,7 +58,7 @@ list(APPEND _common_compile_options -DQAIHUB_LLAMA3_RUNNER)
 
 list(
   APPEND _qaihub_llama3_8b_runner__srcs
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/src/tiktoken.cpp
 )
 list(
   APPEND
@@ -69,7 +69,7 @@ list(
 # build qaihub llama3 8b runner
 add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs})
 target_include_directories(
-  qaihub_llama3_8b_runner PUBLIC ${_common_include_directories}
+  qaihub_llama3_8b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
 )
 
 target_link_libraries(
diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp
index 4bddb32b53e..06ea324ef6f 100644
--- a/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp
+++ b/examples/qualcomm/qaihub_scripts/llama/runner/runner.cpp
@@ -12,7 +12,7 @@
 #if defined(QAIHUB_LLAMA3_RUNNER)
 #include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
 #else
-#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 #endif
 #include <executorch/examples/qualcomm/qaihub_scripts/llama/runner/runner.h>
 #include <executorch/extension/evalue_util/print_evalue.h>
@@ -79,7 +79,7 @@ Runner::Runner(
   eos_id_.insert(tokenizer_->encode("<|eot_id|>", 0, 0).get()[0]);
   version_ = LlamaVersion::kLlama3;
 #else
-  tokenizer_ = std::make_unique<executorch::extension::llm::BPETokenizer>();
+  tokenizer_ = std::make_unique<tokenizers::Llama2cTokenizer>();
   tokenizer_->load(tokenizer_path_);
   version_ = LlamaVersion::kLlama2;
 #endif
@@ -231,9 +231,9 @@ Error Runner::generate(
       break;
   }
 
-  Result<std::vector<uint64_t>> encode_res =
+  tokenizers::Result<std::vector<uint64_t>> encode_res =
       tokenizer_->encode(post_process_prompt, n_bos_, 0);
-  ET_CHECK_OK_OR_RETURN_ERROR(
+  ET_CHECK_TK_OK_OR_RETURN_ERROR(
       encode_res.error(),
       "failed to encode prompt %s",
       post_process_prompt.c_str());
diff --git a/examples/qualcomm/qaihub_scripts/llama/runner/runner.h b/examples/qualcomm/qaihub_scripts/llama/runner/runner.h
index be9af7e2275..9672d6a3586 100644
--- a/examples/qualcomm/qaihub_scripts/llama/runner/runner.h
+++ b/examples/qualcomm/qaihub_scripts/llama/runner/runner.h
@@ -19,8 +19,8 @@
 
 #include <executorch/examples/qualcomm/qaihub_scripts/llama/runner/io_memory.h>
 #include <executorch/extension/llm/sampler/sampler.h>
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
 #include <executorch/extension/module/module.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace example {
 
@@ -101,7 +101,7 @@ class Runner {
   std::vector<std::string> method_names_;
   std::string tokenizer_path_;
   float temperature_;
-  std::unique_ptr<executorch::extension::llm::Tokenizer> tokenizer_;
+  std::unique_ptr<tokenizers::Tokenizer> tokenizer_;
   std::unique_ptr<executorch::extension::llm::Sampler> sampler_;
   Stats stats_;
   std::unique_ptr<Memory> io_mem_;
diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt
index 7adb980d224..6a0f8ad020f 100644
--- a/extension/llm/runner/CMakeLists.txt
+++ b/extension/llm/runner/CMakeLists.txt
@@ -51,5 +51,5 @@ target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})
 
 target_include_directories(
   extension_llm_runner INTERFACE ${_common_include_directories}
-                                 ${EXECUTORCH_ROOT}
+                                 ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h
index 94539c65cc6..c17e039c11b 100644
--- a/extension/llm/runner/multimodal_runner.h
+++ b/extension/llm/runner/multimodal_runner.h
@@ -26,8 +26,8 @@
 #include <executorch/extension/llm/runner/text_prefiller.h>
 #include <executorch/extension/llm/runner/text_token_generator.h>
 #include <executorch/extension/llm/sampler/sampler.h>
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
 #include <executorch/extension/module/module.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace executorch {
 namespace extension {
@@ -129,7 +129,7 @@ class ET_EXPERIMENTAL MultimodalRunner {
   std::unique_ptr<ImagePrefiller> image_prefiller_;
   std::unique_ptr<TextTokenGenerator> text_token_generator_;
   std::string tokenizer_path_;
-  std::unique_ptr<Tokenizer> tokenizer_;
+  std::unique_ptr<::tokenizers::Tokenizer> tokenizer_;
 
   // stats
   Stats stats_;
diff --git a/extension/llm/runner/targets.bzl b/extension/llm/runner/targets.bzl
index aa42c22b1b9..03b593cacf5 100644
--- a/extension/llm/runner/targets.bzl
+++ b/extension/llm/runner/targets.bzl
@@ -49,7 +49,7 @@ def define_common_targets():
             ],
             exported_deps = [
                 ":text_decoder_runner" + aten_suffix,
-                "//executorch/extension/llm/tokenizer:tokenizer_header",
+                "//pytorch/tokenizers:headers",
                 "//executorch/extension/module:module" + aten_suffix,
                 "//executorch/extension/tensor:tensor" + aten_suffix,
             ],
@@ -63,7 +63,7 @@ def define_common_targets():
             ],
             exported_deps = [
                 ":text_decoder_runner" + aten_suffix,
-                "//executorch/extension/llm/tokenizer:tokenizer_header",
+                "//pytorch/tokenizers:headers",
                 "//executorch/extension/module:module" + aten_suffix,
                 "//executorch/extension/tensor:tensor" + aten_suffix,
             ],
diff --git a/extension/llm/runner/text_prefiller.h b/extension/llm/runner/text_prefiller.h
index 2f1d5ae2b75..007f8188f56 100644
--- a/extension/llm/runner/text_prefiller.h
+++ b/extension/llm/runner/text_prefiller.h
@@ -12,8 +12,6 @@
 #pragma once
 
 #include <executorch/extension/llm/runner/text_decoder_runner.h>
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
-#include <functional>
 
 namespace executorch {
 namespace extension {
diff --git a/extension/llm/runner/text_token_generator.h b/extension/llm/runner/text_token_generator.h
index 62b924a57d8..e8bf891f8ec 100644
--- a/extension/llm/runner/text_token_generator.h
+++ b/extension/llm/runner/text_token_generator.h
@@ -11,8 +11,8 @@
 
 #include <executorch/extension/llm/runner/stats.h>
 #include <executorch/extension/llm/runner/text_decoder_runner.h>
-#include <executorch/extension/llm/tokenizer/tokenizer.h>
 #include <executorch/extension/tensor/tensor.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace executorch {
 namespace extension {
@@ -21,7 +21,7 @@ namespace llm {
 class ET_EXPERIMENTAL TextTokenGenerator {
  public:
   TextTokenGenerator(
-      Tokenizer* tokenizer,
+      ::tokenizers::Tokenizer* tokenizer,
       TextDecoderRunner* text_decoder_runner,
       bool use_kv_cache,
       std::unique_ptr<std::unordered_set<uint64_t>>&& eos_ids,
@@ -106,7 +106,8 @@ class ET_EXPERIMENTAL TextTokenGenerator {
       }
 
       // print the token as string, decode it with the Tokenizer object
-      token_callback(ET_UNWRAP(tokenizer_->decode(prev_token, cur_token)));
+      token_callback(
+          ET_UNWRAP_TOKENIZER(tokenizer_->decode(prev_token, cur_token)));
 
       if (should_stop_) {
         break;
@@ -130,7 +131,7 @@ class ET_EXPERIMENTAL TextTokenGenerator {
   }
 
  private:
-  Tokenizer* tokenizer_;
+  ::tokenizers::Tokenizer* tokenizer_;
   TextDecoderRunner* text_decoder_runner_;
   std::unique_ptr<std::unordered_set<uint64_t>> eos_ids_;
   bool use_kv_cache_;
diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h
index 04d4eccc4a7..d6ab23827f9 100644
--- a/extension/llm/runner/util.h
+++ b/extension/llm/runner/util.h
@@ -15,6 +15,30 @@
 #include <sys/resource.h>
 #endif
 
+#define ET_UNWRAP_TOKENIZER(result__)                       \
+  ({                                                        \
+    auto tk_result__ = (result__);                          \
+    if (!tk_result__.ok()) {                                \
+      ET_LOG(                                               \
+          Error,                                            \
+          "Tokenizers error code %d",                       \
+          static_cast<uint32_t>(tk_result__.error()));      \
+      return ::executorch::runtime::Error::InvalidArgument; \
+    }                                                       \
+    std::move(*tk_result__);                                \
+  })
+
+#define ET_CHECK_TK_OK_OR_RETURN_ERROR(result__, ...)                        \
+  ({                                                                         \
+    auto tk_result__ = (result__);                                           \
+    if (tk_result__ != ::tokenizers::Error::Ok) {                            \
+      ET_LOG(                                                                \
+          Error, "Tokenizer error: %d", static_cast<uint32_t>(tk_result__)); \
+      ET_CHECK_OK_OR_RETURN_ERROR(                                           \
+          ::executorch::runtime::Error::InvalidArgument, ##__VA_ARGS__);     \
+    }                                                                        \
+  })
+
 namespace executorch {
 namespace extension {
 namespace llm {
diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
index 4da2387af9c..ec61ab1489e 160000
--- a/extension/llm/tokenizers
+++ b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit 4da2387af9c9ab8730c1bcbcd2f49385a7ba6bbb
+Subproject commit ec61ab1489e2d0fb6ac82b39288ce505bf8bdeca
diff --git a/install_executorch.py b/install_executorch.py
index 0d82f0a05ca..1c5959cd8bb 100644
--- a/install_executorch.py
+++ b/install_executorch.py
@@ -14,6 +14,7 @@
 import shutil
 import subprocess
 import sys
+from contextlib import contextmanager
 
 from install_requirements import (
     install_requirements,
@@ -28,6 +29,17 @@
 logger = logging.getLogger()
 
 
+@contextmanager
+def pushd(new_dir):
+    """Change the current directory to new_dir and yield. When exiting the context, change back to the original directory."""
+    original_dir = os.getcwd()
+    os.chdir(new_dir)
+    try:
+        yield
+    finally:
+        os.chdir(original_dir)
+
+
 def clean():
     print("Cleaning build artifacts...")
     print("Cleaning pip-out/...")
@@ -66,6 +78,7 @@ def clean():
     "pthreadpool": "CMakeLists.txt",
     "pybind11": "CMakeLists.txt",
     "shim": "BUCK",
+    "tokenizers": "CMakeLists.txt",
     "XNNPACK": "CMakeLists.txt",
 }
 
@@ -117,6 +130,11 @@ def check_folder(folder: str, file: str) -> bool:
                 logger.error(f"{file} not found in {path}.")
                 logger.error("Please run `git submodule update --init`.")
                 exit(1)
+    # Go into tokenizers submodule and install its submodules
+    tokenizers_path = get_required_submodule_paths().get("tokenizers", None)
+    if tokenizers_path:
+        with pushd(tokenizers_path):
+            subprocess.check_call(["git", "submodule", "update", "--init"])
     logger.info("All required submodules are present.")
 
 
diff --git a/shim_et/xplat/executorch/build/env_interface.bzl b/shim_et/xplat/executorch/build/env_interface.bzl
index 43d3cb644e3..d34ad8c8042 100644
--- a/shim_et/xplat/executorch/build/env_interface.bzl
+++ b/shim_et/xplat/executorch/build/env_interface.bzl
@@ -10,11 +10,15 @@ load(":type_defs.bzl", "is_list", "is_tuple")
 
 _ET_TARGET_PREFIX = "executorch"
 
+_TOKENIZER_TARGET_PREFIX = "pytorch/tokenizers"
+
 # Indicates that an external_dep entry should fall through to the underlying
 # buck rule.
 _EXTERNAL_DEP_FALLTHROUGH = "<fallthrough>"
 
 _EXTERNAL_DEPS = {
+    # Abseil for tokenizers
+    "abseil-cpp": "//extension/llm/tokenizers/third-party:abseil",
     # ATen C++ library deps
     "aten-core": [],  # TODO(larryliu0820): Add support
     # ATen native_functions.yaml file deps
@@ -46,7 +50,6 @@ _EXTERNAL_DEPS = {
     "re2": "//extension/llm/tokenizers/third-party:re2",
     "sentencepiece": [], # Intentionally not supporting OSS buck build of sentencepiece.
     "sentencepiece-py": [],
-    "tiktoken": "//extension/llm/tokenizers:tiktoken",
     # Core C++ PyTorch functionality like Tensor and ScalarType.
     "torch-core-cpp": "//third-party:libtorch",
     "torchgen": "//third-party:torchgen",
@@ -66,10 +69,11 @@ def _resolve_external_dep(name):
         return [res]
 
 def _start_with_et_targets(target):
-    prefix = "//" + _ET_TARGET_PREFIX
-    for suffix in ("/", ":"):
-        if target.startswith(prefix + suffix):
-            return True
+    for prefix in [_ET_TARGET_PREFIX, _TOKENIZER_TARGET_PREFIX]:
+        prefix = "//" + prefix
+        for suffix in ("/", ":"):
+            if target.startswith(prefix + suffix):
+                return True
     return False
 
 def _patch_platforms(kwargs):
@@ -199,7 +203,11 @@ def _target_needs_patch(target):
     return _start_with_et_targets(target) or target.startswith(":")
 
 def _patch_target_for_env(target):
-    return target.replace("//executorch/", "//", 1)
+    if _ET_TARGET_PREFIX in target:
+        return target.replace("//executorch/", "//", 1)
+    elif _TOKENIZER_TARGET_PREFIX in target:
+        return target.replace("//pytorch/tokenizers", "//extension/llm/tokenizers", 1)
+    return target
 
 def _struct_to_json(object):
     # @lint-ignore BUCKLINT: native and fb_native are explicitly forbidden in fbcode.
@@ -245,6 +253,7 @@ env = struct(
     remove_unsupported_kwargs = _remove_unsupported_kwargs,
     resolve_external_dep = _resolve_external_dep,
     struct_to_json = _struct_to_json,
+    supported_platforms = [],
     target_needs_patch = _target_needs_patch,
     EXTERNAL_DEP_FALLTHROUGH = _EXTERNAL_DEP_FALLTHROUGH,
 )
diff --git a/shim_et/xplat/executorch/build/runtime_wrapper.bzl b/shim_et/xplat/executorch/build/runtime_wrapper.bzl
index c0e8103f52f..ff5c6de6d96 100644
--- a/shim_et/xplat/executorch/build/runtime_wrapper.bzl
+++ b/shim_et/xplat/executorch/build/runtime_wrapper.bzl
@@ -38,6 +38,9 @@ def struct_to_json(x):
 def get_default_executorch_platforms():
     return env.default_platforms
 
+def get_executorch_supported_platforms():
+    return env.supported_platforms
+
 def _patch_executorch_references(targets, use_static_deps = False):
     """Patches up references to "//executorch/..." in lists of build targets.
 
diff --git a/test/build_size_test.sh b/test/build_size_test.sh
index 09c0188ff9b..f7f9a0152d2 100644
--- a/test/build_size_test.sh
+++ b/test/build_size_test.sh
@@ -17,7 +17,7 @@ COMMON_CXXFLAGS="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-contex
 cmake_install_executorch_lib() {
   echo "Installing libexecutorch.a"
   clean_executorch_install_folders
-
+  update_tokenizers_git_submodule
   CXXFLAGS="$COMMON_CXXFLAGS" retry cmake -DBUCK2="$BUCK2" \
           -DCMAKE_CXX_STANDARD_REQUIRED=ON \
           -DCMAKE_INSTALL_PREFIX=cmake-out \