From e9fdaf8701b599fd943bb899639b5e8a4966b3c3 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Mon, 16 Jun 2025 17:22:53 +0000 Subject: [PATCH] Revert "[Quant][CPU] fix fake_quantize_per_tensor_affine of inf values (#155109)" This reverts commit e375d21bb9b0ef6fefe7a8af5a054a17de8c63c9. Reverted https://github.com/pytorch/pytorch/pull/155109 on behalf of https://github.com/malfet due to Looks like it broke ROCM tests ([comment](https://github.com/pytorch/pytorch/pull/155109#issuecomment-2977428354)) --- .../quantized/cpu/kernels/QuantizedOpKernels.cpp | 14 ++++++-------- test/quantization/core/test_workflow_ops.py | 15 --------------- 2 files changed, 6 insertions(+), 23 deletions(-) diff --git a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp index 4c0c532ed778..1ab272e86c15 100644 --- a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp +++ b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp @@ -2699,11 +2699,10 @@ void _fake_quantize_tensor_helper( bool* mask_val = (bool*)(data[1] + i * strides[1]); scalar_t* input_val = (scalar_t*)(data[2] + i * strides[2]); + const auto qval = static_cast(z_point + std::nearbyint(*input_val * inv_scale)); if (fake_quant_on) { - auto qval_f = z_point + std::nearbyint(*input_val * inv_scale); - const auto qval = static_cast(std::fmin(std::fmax(qval_f, quant_min), quant_max)); - *output_val = (qval - z_point) * sc; - *mask_val = ((quant_min <= qval_f) && (qval_f <= quant_max)); + *output_val = (std::fmin(std::fmax(qval, quant_min), quant_max) - z_point) * sc; + *mask_val = ((quant_min <= qval) && (qval <= quant_max)); } else { *output_val = *input_val; *mask_val = 1; @@ -2719,11 +2718,10 @@ void _fake_quantize_tensor_helper( bool* mask_val = (bool*)(data[1] + i * strides[1]); scalar_t* input_val = (scalar_t*)(data[2] + i * strides[2]); + const auto qval = static_cast(z_point + std::nearbyint(*input_val * inv_scale)); if (fake_quant_on) { - auto qval_f = z_point + std::nearbyint(*input_val * inv_scale); - const auto qval = static_cast(std::fmin(std::fmax(qval_f, quant_min), quant_max)); - *output_val = (qval - z_point) * sc; - *mask_val = ((quant_min <= qval_f) && (qval_f <= quant_max)); + *output_val = (std::fmin(std::fmax(qval, quant_min), quant_max) - z_point) * sc; + *mask_val = ((quant_min <= qval) && (qval <= quant_max)); } else { *output_val = *input_val; *mask_val = 1; diff --git a/test/quantization/core/test_workflow_ops.py b/test/quantization/core/test_workflow_ops.py index f182e88d4fd4..36f112949bf1 100644 --- a/test/quantization/core/test_workflow_ops.py +++ b/test/quantization/core/test_workflow_ops.py @@ -1038,21 +1038,6 @@ class TestFakeQuantizeOps(TestCase): input, scale, zero_point, axis, quant_min, quant_max ) - @skipIfTorchDynamo("Not a suitable test for TorchDynamo") - @given(dtype=st.sampled_from([torch.float, torch.float64, torch.half, torch.bfloat16]), - device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu'])) - def test_fake_quantize_per_tensor_affine_inf(self, dtype, device) -> None: - # https://github.com/pytorch/pytorch/issues/154328 - input_tensor = torch.tensor([torch.inf], dtype=dtype).to(device) - scale = 0.01 - zero_point = 0 - quant_min = 0 - quant_max = 255 - result = torch.fake_quantize_per_tensor_affine(input_tensor, scale, zero_point, quant_min, quant_max) - ref_result = (min(quant_max, max(quant_min, torch.round(input_tensor / scale) + zero_point)) - zero_point) * scale - ref_result = torch.Tensor([ref_result]).to(dtype).to(device) - self.assertEqual(result, ref_result) - class TestFusedObsFakeQuant(TestCase): @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),