Skip to content

NXP backend: Add quantization of aten.view #11784

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

7 changes: 0 additions & 7 deletions backends/nxp/backend/ir/tflite_optimizer/optimizer.py
Original file line number Diff line number Diff line change
@@ -54,9 +54,6 @@
from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.remove_unused_tensors_and_buffers import (
RemoveUnusedTensorsAndBuffers,
)
from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.replace_average_pool_before_fully_connected_with_sum import (
ReplaceAveragePoolBeforeFullyConnectedWithSum,
)


class Optimization(Enum):
@@ -83,7 +80,6 @@ class Optimization(Enum):

MOVE_ACTIVATION_BEFORE_CONCAT = 15
COMBINE_HARD_SIGMOID_AND_MUL_INTO_HARD_SWISH = 16
REPLACE_AVERAGE_POOL_BEFORE_FULLY_CONNECTED_WITH_SUM = 17


class Optimizer:
@@ -164,9 +160,6 @@ def __init__(
Optimization.COMBINE_HARD_SIGMOID_AND_MUL_INTO_HARD_SWISH: CombineHardSigmoidAndMulIntoHardSwish(
builder, conversion_config
),
Optimization.REPLACE_AVERAGE_POOL_BEFORE_FULLY_CONNECTED_WITH_SUM: ReplaceAveragePoolBeforeFullyConnectedWithSum(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this related to the view support?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, not related to aten.view. Our fault.
It is indeed a separate commit, and the PR descrition should have included the proper description:
"Fix avegage pooling, which in case of kernel (1,1) turns into Sum in Neutron IR"

At least added in to the PR description.

builder, conversion_config
),
}

def optimize(
2 changes: 2 additions & 0 deletions backends/nxp/quantizer/neutron_quantizer.py
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@
ReluPattern,
ReshapePattern,
SoftMaxPattern,
ViewPattern,
)
from executorch.backends.nxp.quantizer.utils import (
find_sequential_partitions_aten,
@@ -200,6 +201,7 @@ def __init__(self):
NeutronAtenQuantizer(ReluPattern(), static_qconfig),
NeutronAtenQuantizer(ReluInPlacePattern(), static_qconfig),
NeutronAtenQuantizer(AvgPoolPattern(), static_qconfig),
NeutronAtenQuantizer(ViewPattern(), static_qconfig),
]
)

9 changes: 9 additions & 0 deletions backends/nxp/quantizer/patterns.py
Original file line number Diff line number Diff line change
@@ -307,6 +307,15 @@ def partition_types(self):
return [torch.ops.aten.reshape.default]


class ViewPattern(SharedSpecPattern):
"""
Quantizer for View operator.
"""

def partition_types(self):
return [torch.ops.aten.view.default]


class SoftMaxPattern(QuantizationPattern):
"""
Quantizer for Softmax operator.
Original file line number Diff line number Diff line change
@@ -7,13 +7,7 @@
import pytest
import torch

from executorch.backends.nxp.backend.edge_program_converter import (
EdgeProgramToIRConverter,
)
from executorch.backends.nxp.tests.executorch_pipeline import (
to_edge_program,
to_quantized_edge_program,
)
from executorch.backends.nxp.tests.executorch_pipeline import to_edge_program
from executorch.backends.nxp.tests.executors import (
convert_run_compare,
ToNCHWPreprocess,
@@ -22,9 +16,7 @@
from executorch.backends.nxp.tests.models import (
ConstantPadNDConvModule,
ConstantPadNDModule,
Conv2dConstantPadNDModule,
)
from torch.export import ExportedProgram


@pytest.fixture(autouse=True)
@@ -47,37 +39,6 @@ def test_constant_pad_nd_conversion__specific_constant(constant):
convert_run_compare(edge_program, input_data)


@pytest.mark.parametrize("constant", [0.0, 67.28, 42.0, -13.37])
@pytest.mark.skip(reason="Neutron Converter does not fully convert for NPU")
def test_constant_pad_nd_quant_conversion__specific_constant(mocker, constant):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated change?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, this should have been a separate PR to remove irrelevant test. We will pay more attention when rebasing from our development tree.

input_shape = (2, 4, 12, 12)
paddings = (2, 2, 2, 2)

converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")

# Run conversion
_ = to_quantized_edge_program(
Conv2dConstantPadNDModule(paddings, constant), input_shape
)

# Capture generated model
tflite_flatbuffers_model, io_formats = converter_spy.spy_return

# Capture converted program
edge_program: ExportedProgram = converter_spy.call_args.args[1]

input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)

convert_run_compare(
edge_program,
input_data,
tfl_model=tflite_flatbuffers_model,
atol=1.0,
tflite_input_preprocess=ToNHWCPreprocess(),
tflite_output_preprocess=ToNCHWPreprocess(),
)


def test_constant_pad_nd_conversion__default_constant():
input_shape = [2, 4, 6, 8]
paddings = [1, 2, 3, 4]
Original file line number Diff line number Diff line change
@@ -89,6 +89,24 @@ def forward(self, x):
return x


class ConvLinearViewModule(torch.nn.Module):
def __init__(self, channels: int, channels_view_out: int):
super().__init__()
self.conv = nn.Conv2d(channels, channels, 3, 2)
self.linear = nn.Linear(channels_view_out, 32, bias=True)
self.channels_view_out = channels_view_out
self.avg_pool = nn.AvgPool2d(1)
self.relu = nn.ReLU()

def forward(self, x):
x = self.conv(x)
x = self.relu(x)
x = self.avg_pool(x)
x = x.view(-1, self.channels_view_out)
x = self.linear(x)
return x


def test__channels_first_to_2d(mocker):
input_shape = [2, 4, 7, 9]
new_shape = [12, 32] # Mix up the dimensions for a thorough test.
@@ -205,19 +223,20 @@ def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape):


@pytest.mark.parametrize(
"input_shape, new_shape",
"input_shape, channels_view_out",
[
pytest.param((1, 4, 16, 16), (50, 18), id="4D, batch_size=1"),
pytest.param((10, 4, 16, 16), (500, 18), id="4D, , batch_size=10"),
pytest.param((1, 4, 16, 16), 196, id="4D"),
],
)
@pytest.mark.skip(reason="Neutron Converter does not fully convert for NPU")
def test_view_copy_w_conv_quant_conversion(mocker, input_shape, new_shape):
def test_view_w_conv_linear_quant_conversion(mocker, input_shape, channels_view_out):
converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")

# Run conversion
_ = to_quantized_edge_program(
ConvReshapeModule(channels=input_shape[1], new_shape=new_shape), input_shape
ConvLinearViewModule(
channels=input_shape[1], channels_view_out=channels_view_out
),
input_shape,
)

# Capture generated model