[Quantization][PrivateUse1] Adding more support QuantizedPrivateuse1 backends (#139860)

Here's are some explanations of this PR.

1. Changes in `aten/src/ATen/core/Tensor.cpp` and `c10/core/DispatchKey.cpp`: Support toString method for `QuantizedPrivateUse1` backend, make pytorch print out correct backend string for it.
2. Add  header `DispatchStub.h` in `aten/src/ATen/native/quantized/IndexKernel.h`: If this header is not included, we can't utilize `masked_fill_kernel_quantized_stub` even we include this `IndexKernel.h` header, it would throw an error during compilation.
3. Add multiple `TORCH_API`s in `aten/src/ATen/native/quantized/AffineQuantizer.h`: these functions is useful for other privateuse1 backends supporting quantization functions, if these `TORCH_API` are missed, it would throw an error during runtime (undefined symbol)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/139860
Approved by: https://github.com/bdhirsh
This commit is contained in:
fan.mo
2024-11-18 05:09:56 +00:00
committed by PyTorch MergeBot
parent 1d5a8ee8fb
commit 43edb94f8a
4 changed files with 11 additions and 6 deletions

View File

@ -89,6 +89,8 @@ std::string TensorBase::toString() const {
dispatchkey_str = c10::get_privateuse1_backend();
} else if (dispatchkey == c10::DispatchKey::AutocastPrivateUse1) {
dispatchkey_str = "Autocast" + c10::get_privateuse1_backend();
} else if (dispatchkey == c10::DispatchKey::QuantizedPrivateUse1) {
dispatchkey_str = "Quantized" + c10::get_privateuse1_backend();
} else {
dispatchkey_str = at::toString(dispatchkey);
}

View File

@ -8,37 +8,37 @@
namespace at {
namespace native {
Tensor& quantize_tensor_per_tensor_affine(
TORCH_API Tensor& quantize_tensor_per_tensor_affine(
const Tensor& rtensor,
Tensor& qtensor,
double scale,
int64_t zero_point);
Tensor& quantize_tensor_per_channel_affine(
TORCH_API Tensor& quantize_tensor_per_channel_affine(
const Tensor& rtensor,
Tensor& qtensor,
const Tensor& scales,
Tensor zero_points,
int64_t axis);
Tensor& quantize_tensor_per_channel_float_qparams(
TORCH_API Tensor& quantize_tensor_per_channel_float_qparams(
const Tensor& rtensor,
Tensor& qtensor,
const Tensor& scales,
const Tensor& zero_points,
int64_t axis);
Tensor& dequantize_tensor_per_tensor_affine(
TORCH_API Tensor& dequantize_tensor_per_tensor_affine(
const Tensor& qtensor,
Tensor& rtensor,
double scale,
int64_t zero_point);
Tensor& dequantize_tensor_per_channel_affine(
TORCH_API Tensor& dequantize_tensor_per_channel_affine(
const Tensor& qtensor,
Tensor& rtensor,
const Tensor& scales,
Tensor zero_points,
int64_t axis);
Tensor& dequantize_tensor_per_channel_float_qparams(
TORCH_API Tensor& dequantize_tensor_per_channel_float_qparams(
const Tensor& qtensor,
Tensor& rtensor,
const Tensor& scales,

View File

@ -1,4 +1,5 @@
#pragma once
#include <ATen/native/DispatchStub.h>
#include <ATen/native/TensorIterator.h>
namespace at {

View File

@ -84,6 +84,8 @@ const char* toString(DispatchKey t) {
case DispatchKey::Quantized:
return "Quantized";
case DispatchKey::QuantizedPrivateUse1:
return "QuantizedPrivateUse1";
case DispatchKey::CustomRNGKeyId:
return "CustomRNGKeyId";
case DispatchKey::MkldnnCPU: