Skip to content

Commit 2a54b8b

Browse files
dzdangfacebook-github-bot
authored andcommitted
Towards supporting quantized structured kernels (pytorch#74560)
Summary: Pull Request resolved: pytorch#74560 This PR add support for quantized tensors with "unknown quantizer", which means that we can use standard APIs like torch.empty to allocate quantized tensors, with the understanding that we will set the quantizer later. This makes meta functions applicable to quantized tensors (they will allocate with unknown quantizer and the kernel will set the quantizer later) and fixes a bug David Dang reported where structured kernels give a weird error message when you call them with quantized inputs. This is not a complete support for quantized structured kernels because I haven't actually tried porting any of the quantized implementations to structured; qadd is probably a good choice to try first as it does its broadcasting implementation using TensorIterator. My goal here is just to show that the error message is better. See also pytorch#52680 Signed-off-by: Edward Z. Yang <ezyangfb.com> Test Plan: Imported from OSS Reviewed By: mruberry Differential Revision: D35317441 Pulled By: dzdang fbshipit-source-id: ffb85b0e06ccbcc2b01052ca6760517684048b39
1 parent ed5cdb7 commit 2a54b8b

File tree

6 files changed

+91
-17
lines changed

6 files changed

+91
-17
lines changed

aten/src/ATen/native/native_functions.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1878,6 +1878,7 @@
18781878
MkldnnCPU: empty_mkldnn
18791879
SparseCPU, SparseCUDA: empty_sparse
18801880
SparseCsrCPU, SparseCsrCUDA: empty_sparse_csr
1881+
QuantizedCPU, QuantizedCUDA: empty_unknown_quantized
18811882

18821883
# We do not make new_empty a composite that calls into new_empty_strided, as the strided version
18831884
# is significantly more difficult to implement by different backends
@@ -1949,6 +1950,7 @@
19491950
CPU: empty_strided_cpu
19501951
CUDA: empty_strided_cuda
19511952
Meta: empty_strided_meta
1953+
QuantizedCPU, QuantizedCUDA: empty_strided_unknown_quantized
19521954

19531955
- func: erf(Tensor self) -> Tensor
19541956
device_check: NoCheck # TensorIterator

aten/src/ATen/native/quantized/TensorFactories.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,40 @@ Tensor empty_per_channel_affine_quantized(
6666
quantizer);
6767
}
6868

69+
Tensor empty_unknown_quantized(
70+
IntArrayRef size,
71+
c10::optional<ScalarType> dtype,
72+
c10::optional<Layout> layout,
73+
c10::optional<Device> device,
74+
c10::optional<bool> pin_memory,
75+
c10::optional<c10::MemoryFormat> optional_memory_format) {
76+
// See [Note: hacky wrapper removal for TensorOptions]
77+
TensorOptions options_ = TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory);
78+
79+
TORCH_CHECK(
80+
!(options_.has_memory_format() && optional_memory_format.has_value()),
81+
"Cannot set memory_format both in TensorOptions and explicit argument; please delete "
82+
"the redundant setter.");
83+
auto options = options_.merge_memory_format(optional_memory_format);
84+
TORCH_CHECK(
85+
options.has_dtype(),
86+
"Must provide data type for Tensor creation functions.");
87+
QuantizerPtr quantizer = make_unknown_quantizer(typeMetaToScalarType(options.dtype()));
88+
return new_qtensor(size, options, quantizer);
89+
}
90+
91+
Tensor empty_strided_unknown_quantized(
92+
IntArrayRef size,
93+
IntArrayRef strided,
94+
c10::optional<ScalarType> dtype,
95+
c10::optional<Layout> layout,
96+
c10::optional<Device> device,
97+
c10::optional<bool> pin_memory) {
98+
99+
TORCH_CHECK(false, "empty_strided not supported on quantized tensors yet see https://github.com/pytorch/pytorch/issues/74540")
100+
101+
}
102+
69103
// Provide better error message if dtype is wrong
70104
Tensor empty_affine_quantized_other_backends_stub(
71105
IntArrayRef,

aten/src/ATen/quantized/Quantizer.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,4 +417,23 @@ Tensor from_blob_quantized_per_channel_affine(
417417
return qtensor;
418418
}
419419

420+
Tensor UnknownQuantizer::quantize(const Tensor& tensor) {
421+
TORCH_INTERNAL_ASSERT(false, "cannot call quantize on UnknownQuantizer");
422+
}
423+
Tensor UnknownQuantizer::dequantize(const Tensor& qtensor) {
424+
TORCH_INTERNAL_ASSERT(false, "cannot call dequantize on UnknownQuantizer");
425+
}
426+
Tensor& UnknownQuantizer::dequantize_out(Tensor& rtensor, const Tensor& qtensor) {
427+
TORCH_INTERNAL_ASSERT(false, "cannot call dequantize_out on UnknownQuantizer");
428+
}
429+
QScheme UnknownQuantizer::qscheme() const {
430+
TORCH_INTERNAL_ASSERT(false, "cannot call qscheme on UnknownQuantizer");
431+
}
432+
bool UnknownQuantizer::equalTo(QuantizerPtr other) {
433+
TORCH_INTERNAL_ASSERT(false, "cannot call equalTo on UnknownQuantizer");
434+
}
435+
QuantizerPtr make_unknown_quantizer(ScalarType scalar_type) {
436+
return c10::make_intrusive<UnknownQuantizer>(scalar_type);
437+
}
438+
420439
} // namespace at

aten/src/ATen/quantized/Quantizer.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,23 @@
1818

1919
namespace at {
2020

21+
/**
22+
* UnknownQuantizer is a placeholder quantizer for functions that implement
23+
* quantization in a two step process. First a tensor is allocated but with
24+
* unknown quantizer, and then the quantization kernel decides what the final
25+
* quantizer will be.
26+
*/
27+
struct TORCH_API UnknownQuantizer : public Quantizer {
28+
explicit UnknownQuantizer(ScalarType scalar_type)
29+
: Quantizer(scalar_type) {}
30+
31+
Tensor quantize(const Tensor& tensor) override;
32+
Tensor dequantize(const Tensor& qtensor) override;
33+
Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
34+
QScheme qscheme() const override;
35+
bool equalTo(QuantizerPtr other) override;
36+
};
37+
2138
/**
2239
* UniformQuantizer is the parent class for all uniform quantizers.
2340
* These quantization scheme will map float value uniformly to
@@ -222,6 +239,8 @@ TORCH_API QuantizerPtr make_per_channel_affine_quantizer(
222239
int64_t axis,
223240
ScalarType scalar_type);
224241

242+
TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
243+
225244
// Create a Quantized Tensor given arguments for normal Tensor and a quantizer
226245
TORCH_API Tensor new_qtensor(
227246
IntArrayRef sizes,

test/test_dispatch.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,5 +935,20 @@ def test_defaultbackend_math(self):
935935
r"Registration to both CompositeImplicitAutograd and CompositeExplicitAutograd is not allowed"):
936936
dispatcher.register(["CompositeExplicitAutograd", "CompositeImplicitAutograd"])
937937

938+
def test_quantized_structured_not_implemented(self):
939+
x = torch.zeros([1, 1, 1])
940+
y = torch.zeros([1, 1, 1])
941+
scale, zero_point = 1.0, 0
942+
dtype = torch.qint8
943+
qx = torch.quantize_per_tensor(x, scale, zero_point, dtype)
944+
qy = torch.quantize_per_tensor(y, scale, zero_point, dtype)
945+
# If bmm gets quantized support you need to update this to something
946+
# else that is not implemented
947+
self.assertRaisesRegex(
948+
NotImplementedError,
949+
"Could not run 'aten::bmm.out' with arguments from the 'QuantizedCPU' backend.",
950+
lambda: torch.bmm(qx, qy)
951+
)
952+
938953
if __name__ == '__main__':
939954
run_tests()

tools/codegen/dest/register_dispatch_key.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -62,30 +62,15 @@ def gen_create_out_helper(backend_index: BackendIndex) -> List[str]:
6262
dispatch = str(backend_index.dispatch_key).lower()
6363
empty_impl = f"at::detail::empty_{dispatch}"
6464
empty_strided_impl = f"at::detail::empty_strided_{dispatch}"
65-
runtime_empty_supported_check = ""
66-
elif backend_index.dispatch_key == DispatchKey.CompositeExplicitAutograd:
65+
elif backend_index.dispatch_key in (
66+
DispatchKey.CompositeExplicitAutograd, DispatchKey.QuantizedCPU, DispatchKey.QuantizedCUDA):
6767
empty_impl = "at::empty"
6868
empty_strided_impl = "at::empty_strided"
69-
runtime_empty_supported_check = """\
70-
if (!c10::detail::backend_supports_empty_operator(options)) {{
71-
// The main purpose of this CompositeExplicitAutograd kernel is to provide
72-
// a "free" implementation of out-of-place operators.
73-
// If a backend hasn't implemented an out-of-place op but has implemented
74-
// the out= variant, then this kernel will call their out= variant.
75-
// It does that by using at::empty() to create the tensor to pass to the out= variant though,
76-
// so this "default" kernel doesn't actually handle backends that don't support at::empty
77-
// (e.g. quantized backends).
78-
// Returning an undefined tensor here allows us to reach the out= kernel and give a better error.
79-
// Longer term, this could be better fixed by https://github.com/pytorch/pytorch/issues/52680
80-
return at::Tensor();
81-
}}
82-
"""
8369
else:
8470
return []
8571

8672
return [f"""
8773
Tensor create_out(IntArrayRef sizes, IntArrayRef strides, const TensorOptions &options) {{
88-
{runtime_empty_supported_check}
8974
if (strides.empty()) {{
9075
return {empty_impl}(sizes, {empty_options});
9176
}} else {{

0 commit comments

Comments
 (0)