Apply automatic Ruff fixes

github-actions[bot] · github-actions[bot] · commit d3ecc01cf30f · 2024-11-15T18:17:53.000Z
diff --git a/torchao/dtypes/__init__.py b/torchao/dtypes/__init__.py
@@ -2,13 +2,13 @@
     AffineQuantizedTensor,
     Float8AQTTensorImpl,
     Float8Layout,
+    Int4CPULayout,
     Layout,
     MarlinQQQLayout,
     MarlinSparseLayout,
     PlainLayout,
     SemiSparseLayout,
     TensorCoreTiledLayout,
-    Int4CPULayout,
     to_affine_quantized_floatx,
     to_affine_quantized_floatx_static,
     # experimental, will be merged into floatx in the future
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -688,9 +688,11 @@ def extra_repr(self):
 
 @dataclass(frozen=True)
 class Int4CPULayout(Layout):
-    """ Only for PyTorch version at least 2.6 """
+    """Only for PyTorch version at least 2.6"""
+
     pass
 
+
 @dataclass(frozen=True)
 class Float8Layout(Layout):
     mm_config: Optional[Float8MMConfig] = None
@@ -1965,7 +1967,8 @@ def from_plain(
                 int_data.dtype == torch.int32
             ), "torch.ops.aten._convert_weight_to_int4pack_for_cpu expects `int32` dtype"
             packed_weight = torch.ops.aten._convert_weight_to_int4pack_for_cpu(
-                int_data, 1 # TODO:remove
+                int_data,
+                1,  # TODO:remove
             )
         elif TORCH_VERSION_AT_LEAST_2_5:
             int_data = (int_data[::, ::2] << 4 | int_data[::, 1::2]).to(torch.uint8)
@@ -2124,6 +2127,7 @@ def get_plain(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     def get_layout(self) -> Layout:
         return self._layout
 
+
 #####################################################
 # torch functional and aten operator implementation #
 #####################################################
diff --git a/torchao/quantization/GPTQ.py b/torchao/quantization/GPTQ.py
@@ -17,6 +17,7 @@
 import torch.nn.functional as F
 from torch.utils._pytree import tree_flatten, tree_unflatten
 
+from torchao.dtypes.utils import is_device
 from torchao.utils import (
     TORCH_VERSION_AT_LEAST_2_3,
     TORCH_VERSION_AT_LEAST_2_6,
@@ -37,7 +38,6 @@
     pack_tinygemm_scales_and_zeros,
     per_token_dynamic_quant,
 )
-from torchao.dtypes.utils import is_device
 
 aten = torch.ops.aten
 
@@ -788,9 +788,14 @@ def _create_quantized_state_dict(
                     self.precision,  # dtype for scales_and_zeros
                 )
                 # TODO: just get the device from mod.weight.device?
-                if is_device(w_int4x8.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6:
-                    weight_int4pack = torch.ops.aten._convert_weight_to_int4pack_for_cpu(
-                        w_int4x8.to(self.device), self.inner_k_tiles
+                if (
+                    is_device(w_int4x8.device.type, "cpu")
+                    and TORCH_VERSION_AT_LEAST_2_6
+                ):
+                    weight_int4pack = (
+                        torch.ops.aten._convert_weight_to_int4pack_for_cpu(
+                            w_int4x8.to(self.device), self.inner_k_tiles
+                        )
                     )
                 else:
                     weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
diff --git a/torchao/quantization/qat/linear.py b/torchao/quantization/qat/linear.py
@@ -9,6 +9,7 @@
 import torch
 import torch.nn.functional as F
 
+from torchao.dtypes.utils import is_device
 from torchao.quantization.GPTQ import (
     Int8DynActInt4WeightLinear,
     WeightOnlyInt4Linear,
@@ -23,14 +24,13 @@
 )
 from torchao.quantization.unified import TwoStepQuantizer
 from torchao.quantization.utils import get_group_qparams_symmetric
+from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
 
 from .api import FakeQuantizeConfig
 from .fake_quantizer import FakeQuantizer
 from .utils import (
     _get_qmin_qmax,
 )
-from torchao.dtypes.utils import is_device
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
 
 
 class FakeQuantizedLinear(torch.nn.Linear):
@@ -375,7 +375,10 @@ def _convert_qat_linear_4w(self, module: torch.nn.Module):
                     n_bit,
                     config.group_size,
                 )
-                if is_device(q_weight.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6:
+                if (
+                    is_device(q_weight.device.type, "cpu")
+                    and TORCH_VERSION_AT_LEAST_2_6
+                ):
                     q_weight = torch.ops.aten._convert_weight_to_int4pack_for_cpu(
                         q_weight.to(child.weight.device),
                         child.inner_k_tiles,
diff --git a/torchao/quantization/subclass.py b/torchao/quantization/subclass.py
@@ -8,16 +8,15 @@
 import torch
 from torch.utils._python_dispatch import return_and_correct_aliasing
 
+from torchao.dtypes.utils import is_device
 from torchao.quantization.utils import (
     dequantize_per_channel,
     dynamically_quantize_per_channel,
     groupwise_affine_quantize_tensor,
     quant_int8_dynamic_per_token_linear,
     unpack_tinygemm_scales_and_zeros,
 )
-from torchao.utils import find_multiple
-from torchao.dtypes.utils import is_device
-from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
+from torchao.utils import TORCH_VERSION_AT_LEAST_2_6, find_multiple
 
 __all__ = [
     "Int8DynamicallyQuantizedLinearWeight",
@@ -620,7 +619,9 @@ def to_qtensor_components(cls, input_float, groupsize=128, inner_k_tiles=8):
             input_float, 4, groupsize, dtype=input_float.dtype
         )
         if is_device(input_float.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6:
-            int_data = aten._convert_weight_to_int4pack_for_cpu(input_int4x8, inner_k_tiles)
+            int_data = aten._convert_weight_to_int4pack_for_cpu(
+                input_int4x8, inner_k_tiles
+            )
         else:
             int_data = aten._convert_weight_to_int4pack(input_int4x8, inner_k_tiles)
         return int_data, scales_and_zeros, False, groupsize, inner_k_tiles
diff --git a/torchao/quantization/utils.py b/torchao/quantization/utils.py
@@ -9,6 +9,7 @@
 import torch
 from torch.utils._python_dispatch import TorchDispatchMode
 
+from torchao.dtypes.utils import is_device
 from torchao.kernel import (
     int_scaled_matmul,
 )
@@ -20,7 +21,6 @@
     quantize_affine,
 )
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_5, TORCH_VERSION_AT_LEAST_2_6
-from torchao.dtypes.utils import is_device
 
 __all__ = [
     "compute_error",
@@ -418,9 +418,11 @@ def groupwise_affine_dequantize_tensor_from_qparams(
     assert groupsize > 1
     assert w_int4x8.dim() == 2
     # need to handle single column case so check for dtype/size from groupwise_affine_quantize_tensor_from_qparams path
-    if TORCH_VERSION_AT_LEAST_2_5 and (
-        w_int4x8.dtype == torch.uint8 or w_int4x8.shape[-1] > 1
-    ) and not (is_device(w_int4x8.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6):
+    if (
+        TORCH_VERSION_AT_LEAST_2_5
+        and (w_int4x8.dtype == torch.uint8 or w_int4x8.shape[-1] > 1)
+        and not (is_device(w_int4x8.device.type, "cpu") and TORCH_VERSION_AT_LEAST_2_6)
+    ):
         data = w_int4x8.to(torch.int32)
         high_bits = data >> 4
         low_bits = data & 0x0F