mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "[Quant][CPU] fix fake_quantize_per_tensor_affine of inf values (#155109)"
This reverts commit e375d21bb9b0ef6fefe7a8af5a054a17de8c63c9. Reverted https://github.com/pytorch/pytorch/pull/155109 on behalf of https://github.com/malfet due to Looks like it broke ROCM tests ([comment](https://github.com/pytorch/pytorch/pull/155109#issuecomment-2977428354))
This commit is contained in:
@ -2699,11 +2699,10 @@ void _fake_quantize_tensor_helper(
|
||||
bool* mask_val = (bool*)(data[1] + i * strides[1]);
|
||||
scalar_t* input_val = (scalar_t*)(data[2] + i * strides[2]);
|
||||
|
||||
const auto qval = static_cast<int64_t>(z_point + std::nearbyint(*input_val * inv_scale));
|
||||
if (fake_quant_on) {
|
||||
auto qval_f = z_point + std::nearbyint(*input_val * inv_scale);
|
||||
const auto qval = static_cast<int64_t>(std::fmin(std::fmax(qval_f, quant_min), quant_max));
|
||||
*output_val = (qval - z_point) * sc;
|
||||
*mask_val = ((quant_min <= qval_f) && (qval_f <= quant_max));
|
||||
*output_val = (std::fmin(std::fmax(qval, quant_min), quant_max) - z_point) * sc;
|
||||
*mask_val = ((quant_min <= qval) && (qval <= quant_max));
|
||||
} else {
|
||||
*output_val = *input_val;
|
||||
*mask_val = 1;
|
||||
@ -2719,11 +2718,10 @@ void _fake_quantize_tensor_helper(
|
||||
bool* mask_val = (bool*)(data[1] + i * strides[1]);
|
||||
scalar_t* input_val = (scalar_t*)(data[2] + i * strides[2]);
|
||||
|
||||
const auto qval = static_cast<int64_t>(z_point + std::nearbyint(*input_val * inv_scale));
|
||||
if (fake_quant_on) {
|
||||
auto qval_f = z_point + std::nearbyint(*input_val * inv_scale);
|
||||
const auto qval = static_cast<int64_t>(std::fmin(std::fmax(qval_f, quant_min), quant_max));
|
||||
*output_val = (qval - z_point) * sc;
|
||||
*mask_val = ((quant_min <= qval_f) && (qval_f <= quant_max));
|
||||
*output_val = (std::fmin(std::fmax(qval, quant_min), quant_max) - z_point) * sc;
|
||||
*mask_val = ((quant_min <= qval) && (qval <= quant_max));
|
||||
} else {
|
||||
*output_val = *input_val;
|
||||
*mask_val = 1;
|
||||
|
@ -1038,21 +1038,6 @@ class TestFakeQuantizeOps(TestCase):
|
||||
input, scale, zero_point, axis, quant_min, quant_max
|
||||
)
|
||||
|
||||
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
|
||||
@given(dtype=st.sampled_from([torch.float, torch.float64, torch.half, torch.bfloat16]),
|
||||
device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']))
|
||||
def test_fake_quantize_per_tensor_affine_inf(self, dtype, device) -> None:
|
||||
# https://github.com/pytorch/pytorch/issues/154328
|
||||
input_tensor = torch.tensor([torch.inf], dtype=dtype).to(device)
|
||||
scale = 0.01
|
||||
zero_point = 0
|
||||
quant_min = 0
|
||||
quant_max = 255
|
||||
result = torch.fake_quantize_per_tensor_affine(input_tensor, scale, zero_point, quant_min, quant_max)
|
||||
ref_result = (min(quant_max, max(quant_min, torch.round(input_tensor / scale) + zero_point)) - zero_point) * scale
|
||||
ref_result = torch.Tensor([ref_result]).to(dtype).to(device)
|
||||
self.assertEqual(result, ref_result)
|
||||
|
||||
|
||||
class TestFusedObsFakeQuant(TestCase):
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
|
Reference in New Issue
Block a user