Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .ci/scripts/test_ane_static_llama.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
fi

which "${PYTHON_EXECUTABLE}"

pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama

# Download stories llama110m artifacts
download_stories_model_artifacts

python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w

popd
22 changes: 22 additions & 0 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,28 @@ jobs:
# see if we can import the module successfully
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"

test-static-llama-ane:
name: test-static-llama-ane
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux
bash .ci/scripts/setup-conda.sh
eval "$(conda shell.bash hook)"

# Install requirements
sh install_requirements.sh
sh backends/apple/coreml/scripts/install_requirements.sh
python install_executorch.py --pybind coreml
sh examples/models/llama/install_requirements.sh

# Test ANE llama
sh .ci/scripts/test_ane_static_llama.sh

test-llama-runner-macos:
name: test-llama-runner-mac
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
Expand Down
1 change: 1 addition & 0 deletions examples/apple/coreml/llama/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def main() -> None:
torch.ops.aten.scaled_dot_product_attention.default,
# preserve norm op for numerical stability
torch.ops.aten.linalg_vector_norm.default,
torch.ops.aten.reciprocal.default,
],
compile_config=EdgeCompileConfig(
_check_ir_validity=False,
Expand Down
6 changes: 4 additions & 2 deletions examples/apple/coreml/llama/llama_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ def _norm(self, x):
# We have yet to do large scale evaluations on the numeric stability of this solution, but note that
# it appears better than what exists currently (removing FP32 casts and using FP16)
rms_norm_eps0 = (
x * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype))
) / torch.linalg.vector_norm(x, dim=-1, keepdim=True)
x
* torch.sqrt(torch.tensor(self.dim, dtype=x.dtype))
* torch.reciprocal(torch.linalg.vector_norm(x, dim=-1, keepdim=True))
)
return rms_norm_eps0

def forward(self, x):
Expand Down
Loading