Add tests for qwen + allow uninitialized weights in Llama model (#8552)

jackzhxng · web-flow · commit 0add08096af7 · 2025-02-24T18:01:16.000-08:00
diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
@@ -90,7 +90,7 @@ def model_should_run_on_event(model: str, event: str) -> bool:
     We put higher priority and fast models to pull request and rest to push.
     """
     if event == "pull_request":
-        return model in ["mv3", "vit"]
+        return model in ["mv3", "vit", "qwen2_5"]  # TODO: remove, just to test the ci
     elif event == "push":
         # These are super slow. Only run it periodically
         return model not in ["dl3", "edsr", "emformer_predict"]
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
@@ -91,7 +91,17 @@ test_model() {
     # Install requirements for llama vision.
     bash examples/models/llama3_2_vision/install_requirements.sh
   fi
-  # python3 -m examples.portable.scripts.export --model_name="llama2" should works too
+  if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
+      # Install requirements for export_llama
+      bash examples/models/llama/install_requirements.sh
+      # Test export_llama script: python3 -m examples.models.llama.export_llama.
+      # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
+      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/qwen2_5/1_5b_config.json
+      rm "./${MODEL_NAME}.pte"
+      return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
+  fi
+
+  # Export a basic .pte and run the model.
   "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
   run_portable_executor_runner
 }
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
@@ -34,6 +34,7 @@
     "resnet50": ("resnet", "ResNet50Model"),
     "llava": ("llava", "LlavaModel"),
     "efficient_sam": ("efficient_sam", "EfficientSAM"),
+    "qwen2_5": ("qwen2_5", "Qwen2_5Model"),
 }
 
 __all__ = [
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -84,13 +84,15 @@
 verbosity_setting = None
 
 
+# All models that leverage the transformer architecture defined in llama_transformer.py.
 EXECUTORCH_DEFINED_MODELS = [
     "stories110m",
     "llama2",
     "llama3",
     "llama3_1",
     "llama3_2",
     "static_llama",
+    "qwen2_5",
 ]
 TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
 
diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py
@@ -236,14 +236,23 @@ def __init__(self, **kwargs):
                 eviction_batch_size=eviction_batch_size,
             )
 
-        # assign=True: load params/buffers by assignment instead of performing an in-place copy.
-        # Because we are using device="meta", tensors do not have memory associated with them
-        # and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario.
-        missing, unexpected = self.model_.load_state_dict(
-            checkpoint,
-            strict=False,
-            assign=True,
-        )  # self.model_ = Transformer(gptconf)
+        missing, unexpected = None, None
+        try:
+            # assign=True: load params/buffers by assignment instead of performing an in-place copy.
+            # Because we are using device="meta", tensors do not have memory associated with them
+            # and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario.
+            missing, unexpected = self.model_.load_state_dict(
+                checkpoint,
+                strict=False,
+                assign=True,
+            )  # self.model_ = Transformer(gptconf)
+        except RuntimeError as e:
+            print(
+                "Could not load checkpoint into mode, defaulting to random uninitialized weights."
+            )
+            print(f"Error: {e}")
+            # Need to provide concrete (empty) values for meta-initialized tensors for quantization.
+            self.model_.to_empty(device="cpu")
 
         if missing:
             missing_weights = [fqn for fqn in missing if fqn.endswith(".weight")]
diff --git a/examples/models/qwen2_5/__init__.py b/examples/models/qwen2_5/__init__.py
@@ -0,0 +1,14 @@
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.example.models.llama.model import Llama2Model
+
+
+class Qwen2_5Model(Llama2Model):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+
+__all__ = [
+    "Qwen2_5Model",
+]
diff --git a/examples/models/qwen2_5/convert_weights.py b/examples/models/qwen2_5/convert_weights.py
@@ -68,7 +68,6 @@ def main():
 
     # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
     checkpointer = FullModelHFCheckpointer(
-        # checkpoint_dir="/home/jackzhxng/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B/snapshots/8faed761d45a263340a0528343f099c05c9a4323/",
         checkpoint_dir=args.input_dir,
         checkpoint_files=["model.safetensors"],
         output_dir=".",
@@ -80,7 +79,6 @@ def main():
 
     print("Converting checkpoint...")
     sd = qwen_2_tune_to_meta(sd["model"])
-    # torch.save(sd, "/home/jackzhxng/models/qwen2_5-1_5b.pth")
 
     torch.save(sd, args.output)
     print(f"Checkpoint saved to {args.output}")

Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@`
`34`	`34`	`"resnet50": ("resnet", "ResNet50Model"),`
`35`	`35`	`"llava": ("llava", "LlavaModel"),`
`36`	`36`	`"efficient_sam": ("efficient_sam", "EfficientSAM"),`
	`37`	`+ "qwen2_5": ("qwen2_5", "Qwen2_5Model"),`
`37`	`38`	`}`
`38`	`39`
`39`	`40`	`__all__ = [`