|
8 | 8 | import torch
|
9 | 9 | from torch.utils._python_dispatch import return_and_correct_aliasing
|
10 | 10 |
|
| 11 | +from torchao.dtypes.utils import is_device |
11 | 12 | from torchao.quantization.utils import (
|
12 | 13 | dequantize_per_channel,
|
13 | 14 | dynamically_quantize_per_channel,
|
14 | 15 | groupwise_affine_quantize_tensor,
|
15 | 16 | quant_int8_dynamic_per_token_linear,
|
16 | 17 | unpack_tinygemm_scales_and_zeros,
|
17 | 18 | )
|
18 |
| -from torchao.utils import find_multiple |
19 |
| -from torchao.dtypes.utils import is_device |
20 |
| -from torchao.utils import TORCH_VERSION_AT_LEAST_2_6 |
| 19 | +from torchao.utils import TORCH_VERSION_AT_LEAST_2_6, find_multiple |
21 | 20 |
|
22 | 21 | __all__ = [
|
23 | 22 | "Int8DynamicallyQuantizedLinearWeight",
|
@@ -620,7 +619,9 @@ def to_qtensor_components(cls, input_float, groupsize=128, inner_k_tiles=8):
|
620 | 619 | input_float, 4, groupsize, dtype=input_float.dtype
|
621 | 620 | )
|
622 | 621 | if is_device(input_float.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6:
|
623 |
| - int_data = aten._convert_weight_to_int4pack_for_cpu(input_int4x8, inner_k_tiles) |
| 622 | + int_data = aten._convert_weight_to_int4pack_for_cpu( |
| 623 | + input_int4x8, inner_k_tiles |
| 624 | + ) |
624 | 625 | else:
|
625 | 626 | int_data = aten._convert_weight_to_int4pack(input_int4x8, inner_k_tiles)
|
626 | 627 | return int_data, scales_and_zeros, False, groupsize, inner_k_tiles
|
0 commit comments