[Quantization][PrivateUse1] Adding more support QuantizedPrivateuse1 backends (#139860)

Here's are some explanations of this PR. 1. Changes in `aten/src/ATen/core/Tensor.cpp` and `c10/core/DispatchKey.cpp`: Support toString method for `QuantizedPrivateUse1` backend, make pytorch print out correct backend string for it. 2. Add header `DispatchStub.h` in `aten/src/ATen/native/quantized/IndexKernel.h`: If this header is not included, we can't utilize `masked_fill_kernel_quantized_stub` even we include this `IndexKernel.h` header, it would throw an error during compilation. 3. Add multiple `TORCH_API`s in `aten/src/ATen/native/quantized/AffineQuantizer.h`: these functions is useful for other privateuse1 backends supporting quantization functions, if these `TORCH_API` are missed, it would throw an error during runtime (undefined symbol) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139860 Approved by: https://github.com/bdhirsh
2025-10-20 21:14:14 +08:00 · 2024-11-18 05:09:56 +00:00
parent 1d5a8ee8fb
commit 43edb94f8a
4 changed files with 11 additions and 6 deletions
--- a/aten/src/ATen/core/Tensor.cpp
+++ b/aten/src/ATen/core/Tensor.cpp
@ -89,6 +89,8 @@ std::string TensorBase::toString() const {
      dispatchkey_str = c10::get_privateuse1_backend();
    } else if (dispatchkey == c10::DispatchKey::AutocastPrivateUse1) {
      dispatchkey_str = "Autocast" + c10::get_privateuse1_backend();
+    } else if (dispatchkey == c10::DispatchKey::QuantizedPrivateUse1) {
+      dispatchkey_str = "Quantized" + c10::get_privateuse1_backend();
    } else {
      dispatchkey_str = at::toString(dispatchkey);
    }
--- a/aten/src/ATen/native/quantized/AffineQuantizer.h
+++ b/aten/src/ATen/native/quantized/AffineQuantizer.h
@ -8,37 +8,37 @@
 namespace at {
 namespace native {

-Tensor& quantize_tensor_per_tensor_affine(
+TORCH_API Tensor& quantize_tensor_per_tensor_affine(
    const Tensor& rtensor,
    Tensor& qtensor,
    double scale,
    int64_t zero_point);
-Tensor& quantize_tensor_per_channel_affine(
+TORCH_API Tensor& quantize_tensor_per_channel_affine(
    const Tensor& rtensor,
    Tensor& qtensor,
    const Tensor& scales,
    Tensor zero_points,
    int64_t axis);

-Tensor& quantize_tensor_per_channel_float_qparams(
+TORCH_API Tensor& quantize_tensor_per_channel_float_qparams(
    const Tensor& rtensor,
    Tensor& qtensor,
    const Tensor& scales,
    const Tensor& zero_points,
    int64_t axis);

-Tensor& dequantize_tensor_per_tensor_affine(
+TORCH_API Tensor& dequantize_tensor_per_tensor_affine(
    const Tensor& qtensor,
    Tensor& rtensor,
    double scale,
    int64_t zero_point);
-Tensor& dequantize_tensor_per_channel_affine(
+TORCH_API Tensor& dequantize_tensor_per_channel_affine(
    const Tensor& qtensor,
    Tensor& rtensor,
    const Tensor& scales,
    Tensor zero_points,
    int64_t axis);
-Tensor& dequantize_tensor_per_channel_float_qparams(
+TORCH_API Tensor& dequantize_tensor_per_channel_float_qparams(
    const Tensor& qtensor,
    Tensor& rtensor,
    const Tensor& scales,
--- a/aten/src/ATen/native/quantized/IndexKernel.h
+++ b/aten/src/ATen/native/quantized/IndexKernel.h
@ -1,4 +1,5 @@
 #pragma once
+#include <ATen/native/DispatchStub.h>
 #include <ATen/native/TensorIterator.h>

 namespace at {
--- a/c10/core/DispatchKey.cpp
+++ b/c10/core/DispatchKey.cpp
@ -84,6 +84,8 @@ const char* toString(DispatchKey t) {

    case DispatchKey::Quantized:
      return "Quantized";
+    case DispatchKey::QuantizedPrivateUse1:
+      return "QuantizedPrivateUse1";
    case DispatchKey::CustomRNGKeyId:
      return "CustomRNGKeyId";
    case DispatchKey::MkldnnCPU: