Skip to content

[reland] Unify Quantization APIs for add, pool and relu #26586

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions aten/src/ATen/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,6 @@ class CAFFE2_API Context {
at::QEngine quantized_engine =
#ifdef USE_FBGEMM
at::kFBGEMM;
#elif defined(USE_PYTORCH_QNNPACK)
at::kQNNPACK;
#else
at::kNoQEngine;
#endif
Expand Down
76 changes: 76 additions & 0 deletions aten/src/ATen/native/quantized/cpu/qadd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include <ATen/native/cpu/Loops.h>
#include <ATen/quantized/Quantizer.h>
#include <ATen/native/quantized/cpu/quantized_ops.h>
#include <ATen/native/quantized/cpu/init_qnnpack.h>
#include <ATen/native/quantized/cpu/qnnpack_utils.h>
#include <caffe2/utils/threadpool/ThreadPoolMobile.h>

#include <algorithm>

Expand Down Expand Up @@ -67,9 +70,82 @@ Tensor _add_scalar_out(Tensor& out, const Tensor& self, Scalar other) {

template <bool ReLUFused = false>
class QAdd final : public c10::OperatorKernel {
#ifdef USE_PYTORCH_QNNPACK
Tensor qnnpack_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
TORCH_CHECK(qa.ndimension() > 0, "qnnpack_add(): Got empty input tensor.");
Tensor qa_contig = qa.contiguous();
Tensor qb_contig = qb.contiguous();

const auto a_zero_point = qa_contig.q_zero_point();
const auto b_zero_point = qb_contig.q_zero_point();
const auto a_scale = qa_contig.q_scale();
const auto b_scale = qb_contig.q_scale();

Tensor qy = at::_empty_affine_quantized(
qa_contig.sizes(), at::device(kCPU).dtype(kQUInt8), scale, zero_point);

if (qa_contig.size(0) == 0) {
return qy;
}

initQNNPACK();

pytorch_qnnp_operator_t qnnpack_operator{nullptr};

size_t num_elems = qa_contig.numel() / qa_contig.size(0);

const pytorch_qnnp_status createStatus = pytorch_qnnp_create_add_nc_q8(
num_elems /* input size */,
a_zero_point /* a zero_point */,
a_scale /* a scale */,
b_zero_point /* b zero_point */,
b_scale /* b scale */,
static_cast<uint8_t>(zero_point) /* sum zero_point */,
scale /* sum scale */,
std::numeric_limits<uint8_t>::min() /* output min */,
std::numeric_limits<uint8_t>::max() /* output max */,
0 /* flags */,
&qnnpack_operator);

TORCH_INTERNAL_ASSERT(
createStatus == pytorch_qnnp_status_success,
"failed to create QNNPACK Add operator");

std::unique_ptr<pytorch_qnnp_operator, QnnpackOperatorDeleter>
qnnpack_uniq_ptr(qnnpack_operator);

const pytorch_qnnp_status setupStatus = pytorch_qnnp_setup_add_nc_q8(
qnnpack_operator /* add op */,
qa_contig.size(0) /* batch size */,
(uint8_t*)qa_contig.data_ptr<c10::quint8>() /* a data */,
num_elems /* A stride */,
(uint8_t*)qb_contig.data_ptr<c10::quint8>() /* b data */,
num_elems /* B stride */,
(uint8_t*)qy.data_ptr<c10::quint8>() /* output data */,
num_elems /* sum stride */);
TORCH_INTERNAL_ASSERT(
setupStatus == pytorch_qnnp_status_success,
"failed to setup QNNPACK Add operator");

pthreadpool_t threadpool = caffe2::mobile_threadpool();
const pytorch_qnnp_status runStatus =
pytorch_qnnp_run_operator(qnnpack_operator, threadpool);

TORCH_INTERNAL_ASSERT(
runStatus == pytorch_qnnp_status_success,
"failed to run QNNPACK Add operator");

return qy;
}
#endif
public:
Tensor operator()(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
check_inputs(qa, qb);
#ifdef USE_PYTORCH_QNNPACK
if (at::globalContext().qEngine() == at::QEngine::QNNPACK) {
return qnnpack_add(qa, qb, scale, zero_point);
}
#endif
auto qc = at::_empty_affine_quantized(
qa.sizes(),
at::device(kCPU).dtype(qa.scalar_type()),
Expand Down
105 changes: 0 additions & 105 deletions aten/src/ATen/native/quantized/cpu/qnnpack_add.cpp

This file was deleted.

155 changes: 0 additions & 155 deletions aten/src/ATen/native/quantized/cpu/qnnpack_maxpool.cpp

This file was deleted.

Loading