From eaac0053bcf73bb08d47ebb25f1ba77483a0e9d3 Mon Sep 17 00:00:00 2001 From: Phillis Tang Date: Tue, 2 Jan 2024 12:55:36 -0800 Subject: [PATCH] fix and use the Coreml ANE optimized encoder --- coreml/whisper-encoder.mm | 4 ++-- models/convert-whisper-to-coreml.py | 15 +-------------- models/generate-coreml-model.sh | 2 +- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/coreml/whisper-encoder.mm b/coreml/whisper-encoder.mm index 8e93f180c1b..81a5a6aaa46 100644 --- a/coreml/whisper-encoder.mm +++ b/coreml/whisper-encoder.mm @@ -24,9 +24,9 @@ // select which device to run the Core ML model on MLModelConfiguration *config = [[MLModelConfiguration alloc] init]; - config.computeUnits = MLComputeUnitsCPUAndGPU; + // config.computeUnits = MLComputeUnitsCPUAndGPU; //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine; - //config.computeUnits = MLComputeUnitsAll; + config.computeUnits = MLComputeUnitsAll; const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]); diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py index fd7191abcb2..046aabd224e 100644 --- a/models/convert-whisper-to-coreml.py +++ b/models/convert-whisper-to-coreml.py @@ -143,20 +143,7 @@ def forward(self, x: Tensor): x = block(x) x = self.ln_post(x) - - # """ - # TODO: - # I think we need to transpose the result here to make it fit whisper.cpp memory order. - # However, even doing this, the results are still wrong. Kind of less wrong compared to - # not transposing, but still wrong. - - # Also, I don't know why the original OpenAI implementation does not need to transpose - - # transpose to (batch_size, n_ctx, n_state) - # x : torch.Tensor, shape = (batch_size, n_state, 1, n_ctx) - - # """ - # x = x.transpose(1,3) + x = x.squeeze(2).transpose(1, 2) return x diff --git a/models/generate-coreml-model.sh b/models/generate-coreml-model.sh index bd71b33876e..cb8be6dcbc0 100755 --- a/models/generate-coreml-model.sh +++ b/models/generate-coreml-model.sh @@ -23,7 +23,7 @@ if [[ $mname == "-h5" ]]; then echo $mpath python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True else - python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True + python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True fi xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/