Skip to content

Commit 0add080

Browse files
authored
Add tests for qwen + allow uninitialized weights in Llama model (#8552)
1 parent 745be4e commit 0add080

File tree

7 files changed

+46
-12
lines changed

7 files changed

+46
-12
lines changed

.ci/scripts/gather_test_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def model_should_run_on_event(model: str, event: str) -> bool:
9090
We put higher priority and fast models to pull request and rest to push.
9191
"""
9292
if event == "pull_request":
93-
return model in ["mv3", "vit"]
93+
return model in ["mv3", "vit", "qwen2_5"] # TODO: remove, just to test the ci
9494
elif event == "push":
9595
# These are super slow. Only run it periodically
9696
return model not in ["dl3", "edsr", "emformer_predict"]

.ci/scripts/test_model.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,17 @@ test_model() {
9191
# Install requirements for llama vision.
9292
bash examples/models/llama3_2_vision/install_requirements.sh
9393
fi
94-
# python3 -m examples.portable.scripts.export --model_name="llama2" should works too
94+
if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
95+
# Install requirements for export_llama
96+
bash examples/models/llama/install_requirements.sh
97+
# Test export_llama script: python3 -m examples.models.llama.export_llama.
98+
# Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
99+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/qwen2_5/1_5b_config.json
100+
rm "./${MODEL_NAME}.pte"
101+
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102+
fi
103+
104+
# Export a basic .pte and run the model.
95105
"${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
96106
run_portable_executor_runner
97107
}

examples/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"resnet50": ("resnet", "ResNet50Model"),
3535
"llava": ("llava", "LlavaModel"),
3636
"efficient_sam": ("efficient_sam", "EfficientSAM"),
37+
"qwen2_5": ("qwen2_5", "Qwen2_5Model"),
3738
}
3839

3940
__all__ = [

examples/models/llama/export_llama_lib.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,15 @@
8484
verbosity_setting = None
8585

8686

87+
# All models that leverage the transformer architecture defined in llama_transformer.py.
8788
EXECUTORCH_DEFINED_MODELS = [
8889
"stories110m",
8990
"llama2",
9091
"llama3",
9192
"llama3_1",
9293
"llama3_2",
9394
"static_llama",
95+
"qwen2_5",
9496
]
9597
TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
9698

examples/models/llama/model.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -236,14 +236,23 @@ def __init__(self, **kwargs):
236236
eviction_batch_size=eviction_batch_size,
237237
)
238238

239-
# assign=True: load params/buffers by assignment instead of performing an in-place copy.
240-
# Because we are using device="meta", tensors do not have memory associated with them
241-
# and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario.
242-
missing, unexpected = self.model_.load_state_dict(
243-
checkpoint,
244-
strict=False,
245-
assign=True,
246-
) # self.model_ = Transformer(gptconf)
239+
missing, unexpected = None, None
240+
try:
241+
# assign=True: load params/buffers by assignment instead of performing an in-place copy.
242+
# Because we are using device="meta", tensors do not have memory associated with them
243+
# and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario.
244+
missing, unexpected = self.model_.load_state_dict(
245+
checkpoint,
246+
strict=False,
247+
assign=True,
248+
) # self.model_ = Transformer(gptconf)
249+
except RuntimeError as e:
250+
print(
251+
"Could not load checkpoint into mode, defaulting to random uninitialized weights."
252+
)
253+
print(f"Error: {e}")
254+
# Need to provide concrete (empty) values for meta-initialized tensors for quantization.
255+
self.model_.to_empty(device="cpu")
247256

248257
if missing:
249258
missing_weights = [fqn for fqn in missing if fqn.endswith(".weight")]

examples/models/qwen2_5/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# This source code is licensed under the BSD-style license found in the
2+
# LICENSE file in the root directory of this source tree.
3+
4+
from executorch.example.models.llama.model import Llama2Model
5+
6+
7+
class Qwen2_5Model(Llama2Model):
8+
def __init__(self, **kwargs):
9+
super().__init__(**kwargs)
10+
11+
12+
__all__ = [
13+
"Qwen2_5Model",
14+
]

examples/models/qwen2_5/convert_weights.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def main():
6868

6969
# Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
7070
checkpointer = FullModelHFCheckpointer(
71-
# checkpoint_dir="/home/jackzhxng/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B/snapshots/8faed761d45a263340a0528343f099c05c9a4323/",
7271
checkpoint_dir=args.input_dir,
7372
checkpoint_files=["model.safetensors"],
7473
output_dir=".",
@@ -80,7 +79,6 @@ def main():
8079

8180
print("Converting checkpoint...")
8281
sd = qwen_2_tune_to_meta(sd["model"])
83-
# torch.save(sd, "/home/jackzhxng/models/qwen2_5-1_5b.pth")
8482

8583
torch.save(sd, args.output)
8684
print(f"Checkpoint saved to {args.output}")

0 commit comments

Comments
 (0)