Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@
/neural-compressor/neural_compressor/strategy
/neural-compressor/neural_compressor/training.py
/neural-compressor/neural_compressor/utils
/neural-compressor/neural_compressor/torch/algorithms/static_quant/
/neural-compressor/neural_compressor/torch/algorithms/smooth_quant/
2 changes: 2 additions & 0 deletions neural_compressor/torch/algorithms/smooth_quant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The SmoothQuant-related modules."""


from .utility import *
from .smooth_quant import SmoothQuantQuantizer
Expand Down
4 changes: 3 additions & 1 deletion neural_compressor/torch/algorithms/smooth_quant/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Save and load the quantized model."""


# pylint:disable=import-error
import torch
Expand All @@ -32,7 +34,7 @@ def recover_model_from_json(model, json_file_path, example_inputs): # pragma: n
example_inputs (tuple or torch.Tensor or dict): example inputs that will be passed to the ipex function.

Returns:
(object): quantized model
model (object): quantized model
"""
from torch.ao.quantization.observer import MinMaxObserver

Expand Down
62 changes: 45 additions & 17 deletions neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The quantizer using SmoothQuant path."""


import json
import os
Expand Down Expand Up @@ -49,6 +51,8 @@


class SmoothQuantQuantizer(Quantizer):
"""SmoothQuantQuantizer Class."""

def __init__(self, quant_config: OrderedDict = {}): # pragma: no cover
"""Init a SmoothQuantQuantizer object.

Expand All @@ -61,9 +65,9 @@ def prepare(self, model, example_inputs, inplace=True, *args, **kwargs):
"""Prepares a given model for quantization.

Args:
model: A float model to be quantized.
example_inputs: Used to trace torch model.
inplace: Whether to carry out model transformations in-place. Defaults to True.
model (torch.nn.Module): raw fp32 model or prepared model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A prepared model.
Expand Down Expand Up @@ -128,9 +132,9 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
"""Converts a prepared model to a quantized model.

Args:
model: The prepared model to be converted.
example_inputs: Used to trace torch model.
inplace: Whether to carry out model transformations in-place. Defaults to True.
model (QuantizationInterceptionModule): the prepared model to be converted.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A quantized model.
Expand All @@ -153,14 +157,14 @@ def convert(self, model, example_inputs, inplace=True, *args, **kwargs):
return model

def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args, **kwargs):
"""Execute the quantize process on the specified model.
"""Executes the quantize process on the specified model.

Args:
model: a float model to be quantized.
tune_cfg: quantization config for ops.
run_fn: a calibration function for calibrating the model.
example_inputs: used to trace torch model.
inplace: whether to carry out model transformations in-place.
model (torch.nn.Module): raw fp32 model or prepared model.
tune_cfg (OrderedDict): quantization config for ops.
run_fn (Callable): a calibration function for calibrating the model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A quantized model.
Expand Down Expand Up @@ -255,6 +259,22 @@ def quantize(self, model, tune_cfg, run_fn, example_inputs, inplace=True, *args,
def qdq_quantize(
model, tune_cfg, run_fn, example_inputs, inplace, cfgs, op_infos_from_cfgs, output_tensor_id_op_name, sq
):
"""Executes the smooth quantize process.

Args:
model (torch.nn.Module): raw fp32 model or prepared model.
tune_cfg (OrderedDict): quantization config for ops.
run_fn (Callable): a calibration function for calibrating the model.
example_inputs (tensor/tuple/dict): used to trace torch model.
inplace (bool): whether to carry out model transformations in-place. Defaults to True.
cfgs (dict): configs loaded from ipex config path.
op_infos_from_cfgs (dict): dict containing configs that have been parsed for each op.
output_tensor_id_op_name (dict): dict containing op names corresponding to 'op_infos_from_cfgs'.
sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.

Returns:
A quantized model.
"""
smoothquant_scale_info = sq.sq_scale_info
sq_minmax_init = True if tune_cfg.get("act_algo", "kl") == "minmax" else False

Expand Down Expand Up @@ -325,6 +345,14 @@ def qdq_quantize(


def _apply_pre_optimization(model, tune_cfg, sq, recover=False):
"""Retrieves sq info to absorb the scale to the layer at output channel.

Args:
model (QuantizationInterceptionModule): a prepared model.
tune_cfg (OrderedDict): quantization config for ops.
sq (TorchSmoothQuant): TorchSmoothQuant class containing sq infos.
recover (bool, optional): whether to recover the scale. Defaults to False.
"""
sq_max_info = {}
if sq.record_max_info:
sq_max_info = sq.max_value_info
Expand Down Expand Up @@ -354,13 +382,13 @@ def _apply_pre_optimization(model, tune_cfg, sq, recover=False):


def _ipex_post_quant_process(model, example_inputs, use_bf16, inplace=False):
"""Convert to a jit model.
"""Converts to a jit model.

Args:
model: a prepared model.
example_inputs: used to trace torch model.
use_bf16: whether to use bf16 for mixed precision.
inplace: whether to carry out model transformations in-place.
model (QuantizationInterceptionModule): a prepared model.
example_inputs (tensor/tuple/dict): used to trace torch model.
use_bf16 (bool): whether to use bf16 for mixed precision.
inplace (bool, optional): whether to carry out model transformations in-place. Defaults to True.

Returns:
A converted jit model.
Expand Down
Loading