From cca923c4811dbc00acfe28ac67643a90b723236c Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Tue, 21 May 2019 12:15:44 -0700 Subject: [PATCH] Add dequantize_linear for JIT pass (#20107) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/20107 att Reviewed By: nishantpdce Differential Revision: D15202187 fbshipit-source-id: 7d6274a67fcca695c0425587f35046fecbc2ccdc --- aten/src/ATen/Dispatch.h | 22 +++++++++++----- aten/src/ATen/core/Tensor.h | 1 + aten/src/ATen/core/TensorMethods.h | 3 +++ aten/src/ATen/core/Type.h | 1 + aten/src/ATen/native/native_functions.yaml | 5 ++++ aten/src/ATen/native/quantized/QTensor.cpp | 15 +++++++++++ c10/core/ScalarType.h | 30 ++++++++++++++++++++++ docs/source/tensors.rst | 1 + test/test_torch.py | 6 +++++ torch/_tensor_docs.py | 9 +++++++ 10 files changed, 86 insertions(+), 7 deletions(-) diff --git a/aten/src/ATen/Dispatch.h b/aten/src/ATen/Dispatch.h index 9542fcf1cfde..c116837c36d2 100644 --- a/aten/src/ATen/Dispatch.h +++ b/aten/src/ATen/Dispatch.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -11,6 +12,13 @@ return __VA_ARGS__(); \ } +#define AT_QINT_PRIVATE_CASE_TYPE(enum_type, type, underlying_type, ...) \ + case enum_type: { \ + using scalar_t C10_UNUSED = type; \ + using underlying_t C10_UNUSED = underlying_type; \ + return __VA_ARGS__(); \ + } + namespace detail { template @@ -211,14 +219,14 @@ inline void deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX() {} #define AT_DISPATCH_QINT_TYPES(TYPE, NAME, ...) \ [&] { \ switch (TYPE) { \ - AT_PRIVATE_CASE_TYPE( \ - at::ScalarType::QInt8, qint8, __VA_ARGS__) \ - AT_PRIVATE_CASE_TYPE( \ - at::ScalarType::QUInt8, quint8, __VA_ARGS__) \ - AT_PRIVATE_CASE_TYPE( \ - at::ScalarType::QInt32, qint32, __VA_ARGS__) \ + AT_QINT_PRIVATE_CASE_TYPE( \ + at::ScalarType::QInt8, qint8, int8_t, __VA_ARGS__) \ + AT_QINT_PRIVATE_CASE_TYPE( \ + at::ScalarType::QUInt8, quint8, uint8_t, __VA_ARGS__) \ + AT_QINT_PRIVATE_CASE_TYPE( \ + at::ScalarType::QInt32, qint32, int, __VA_ARGS__) \ default: \ - AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'"); \ + AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'"); \ } \ }() diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index 8465ce47c5b0..55e80aa630f3 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -583,6 +583,7 @@ class CAFFE2_API Tensor { Tensor to_mkldnn() const; Tensor quantize_linear(double scale, int64_t zero_point, ScalarType dtype) const; Tensor dequantize() const; + Tensor dequantize_linear(double scale, int64_t zero_point, ScalarType dtype) const; Scalar q_scale() const; Scalar q_zero_point() const; Tensor int_repr() const; diff --git a/aten/src/ATen/core/TensorMethods.h b/aten/src/ATen/core/TensorMethods.h index 217ecb7e8a20..25ce96c9b749 100644 --- a/aten/src/ATen/core/TensorMethods.h +++ b/aten/src/ATen/core/TensorMethods.h @@ -804,6 +804,9 @@ inline Tensor Tensor::quantize_linear(double scale, int64_t zero_point, ScalarTy inline Tensor Tensor::dequantize() const { return dispatch_type().dequantize(*this); } +inline Tensor Tensor::dequantize_linear(double scale, int64_t zero_point, ScalarType dtype) const { + return dispatch_type().dequantize_linear(*this, scale, zero_point, dtype); +} inline Scalar Tensor::q_scale() const { return dispatch_type().q_scale(*this); } diff --git a/aten/src/ATen/core/Type.h b/aten/src/ATen/core/Type.h index 234425584fa9..ff1bb03e7e55 100644 --- a/aten/src/ATen/core/Type.h +++ b/aten/src/ATen/core/Type.h @@ -393,6 +393,7 @@ struct CAFFE2_API Type { virtual Tensor to_mkldnn(const Tensor & self) const = 0; virtual Tensor quantize_linear(const Tensor & self, double scale, int64_t zero_point, ScalarType dtype) const = 0; virtual Tensor dequantize(const Tensor & self) const = 0; + virtual Tensor dequantize_linear(const Tensor & self, double scale, int64_t zero_point, ScalarType dtype) const = 0; virtual Scalar q_scale(const Tensor & self) const = 0; virtual Scalar q_zero_point(const Tensor & self) const = 0; virtual Tensor int_repr(const Tensor & self) const = 0; diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 73ecd600475a..8500568e4570 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -2551,6 +2551,11 @@ dispatch: QuantizedCPU: dequantize_quant +- func: dequantize_linear(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor + variants: function, method + dispatch: + CPU: dequantize_linear_cpu + - func: q_scale(Tensor self) -> Scalar variants: function, method dispatch: diff --git a/aten/src/ATen/native/quantized/QTensor.cpp b/aten/src/ATen/native/quantized/QTensor.cpp index ef4c7eb686d7..2a93e78f791f 100644 --- a/aten/src/ATen/native/quantized/QTensor.cpp +++ b/aten/src/ATen/native/quantized/QTensor.cpp @@ -16,6 +16,21 @@ Tensor dequantize_quant(const Tensor& self) { return get_qtensorimpl(self)->quantizer()->dequantize(self); } +Tensor dequantize_linear_cpu(const Tensor& self, double scale, int64_t zero_point, ScalarType dtype) { + AT_CHECK(isQIntType(toQIntType(self.scalar_type())), + "Scalar type for quantized Tensor must have same underlying type as input."); + AT_CHECK(dtype == ScalarType::Float, "ScalarType for target Tensor must be float."); + Tensor f = at::empty(self.sizes(), self.options().dtype(dtype)); + AT_DISPATCH_QINT_TYPES( + toQIntType(self.scalar_type()), "dequantize_linear_cpu", [&]() { + underlying_t* qdata = self.data(); + auto* fdata = f.data(); + for (int i = 0; i < self.numel(); ++i) { + fdata[i] = (static_cast(qdata[i]) - zero_point) * scale; + }}); + return f; +} + Scalar q_scale_quant(const Tensor& self) { auto quantizer = get_qtensorimpl(self)->quantizer(); AT_ASSERT(quantizer->qscheme() == kPerTensorAffine); diff --git a/c10/core/ScalarType.h b/c10/core/ScalarType.h index d07a0ad6a9cc..6cb2102b2b0c 100644 --- a/c10/core/ScalarType.h +++ b/c10/core/ScalarType.h @@ -234,6 +234,36 @@ static inline bool isQIntType(ScalarType t) { return t == ScalarType:: QInt8 || t == ScalarType::QUInt8 || t == ScalarType::QInt32; } +static inline ScalarType toQIntType(ScalarType t) { + switch (t) { + case ScalarType::Byte: + return ScalarType::QUInt8; + case ScalarType::Char: + return ScalarType::QInt8; + case ScalarType::Int: + return ScalarType::QInt32; + default: + return t; + } +} + +static inline ScalarType toUnderlying(ScalarType t) { + switch (t) { + case ScalarType::QUInt8: + return ScalarType::Byte; + case ScalarType::QInt8: + return ScalarType::Char; + case ScalarType::QInt32: + return ScalarType::Int; + default: + return t; + } +} + +static inline bool isUnderlying(ScalarType type, ScalarType qtype) { + return type == toUnderlying(qtype); +} + static inline ScalarType promoteTypes(ScalarType a, ScalarType b) { // This is generated according to NumPy's promote_types constexpr auto u1 = ScalarType::Byte; diff --git a/docs/source/tensors.rst b/docs/source/tensors.rst index 2460c4552ada..a86beffabbf2 100644 --- a/docs/source/tensors.rst +++ b/docs/source/tensors.rst @@ -209,6 +209,7 @@ view of a storage and defines numeric operations on it. .. automethod:: cumsum .. automethod:: data_ptr .. automethod:: dequantize + .. automethod:: dequantize_linear .. automethod:: det .. automethod:: dense_dim .. automethod:: detach diff --git a/test/test_torch.py b/test/test_torch.py index 20ae72c8ff1c..7a7d0de1f6bd 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -2804,6 +2804,12 @@ class _TestTorchMixin(object): rqr = qr.dequantize() self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale)) + def test_qtensor_dequantize_linear(self): + t = torch.arange(-10, 10, dtype=torch.int8) + scale = 3 + zero_point = 2 + qt = torch.dequantize_linear(t, scale, zero_point, torch.float) + @unittest.skipIf(torch.cuda.device_count() < 2, 'fewer than 2 GPUs detected') def test_device_guard(self): diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py index 44298e710874..6e39e36dfcc9 100644 --- a/torch/_tensor_docs.py +++ b/torch/_tensor_docs.py @@ -3031,6 +3031,15 @@ det() -> Tensor See :func:`torch.det` """) +add_docstr_all('dequantize_linear', + r""" +dequantize_linear(int_tensor, scale, zero_point) -> Tensor + +Dequantize an int Tensor that represents the underlying quantized data +using affine quantization scheme with given scale and zero_point. +returns a float Tensor. +""") + add_docstr_all('where', r""" where(condition, y) -> Tensor