bf16 support for per tensor backward (#165362)

Adding bf16 for the backward pass of `torch._fake_quantize_learnable_per_tensor_affine()`.

Note that for testing, we modified the seed to avoid increasing tolerance due to cases where difference in Python vs CPP downcasting causes tensor mismatches. (e.g. 27.87704 vs  27.8408 before downcasting, 27.7500 vs 27.8750 after downcasting for Python vs CPP op)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165362
Approved by: https://github.com/andrewor14
This commit is contained in:
Angel Li
2025-10-14 09:40:23 -07:00
committed by PyTorch MergeBot
parent 85586d7efc
commit fe5ccb1a74
2 changed files with 55 additions and 31 deletions

View File

@ -184,15 +184,23 @@ std::tuple<Tensor, Tensor, Tensor> _fake_quantize_learnable_per_tensor_affine_ba
0 & \text{ else }
\end{cases}
*/
float scale_val = scale[0].item<float>();
float inv_scale_val = 1.0f / scale_val;
int64_t zero_point_val = native::_get_zero_point_from_tensor(zero_point, quant_min, quant_max, false);
TORCH_CHECK(dY.scalar_type() == ScalarType::Float);
TORCH_CHECK(X.scalar_type() == ScalarType::Float);
TORCH_CHECK(scale.scalar_type() == ScalarType::Float);
TORCH_CHECK(zero_point.scalar_type() == ScalarType::Float);
TORCH_CHECK(X.numel() == dY.numel(), "`X` and `dY` are not the same size");
bool is_bfloat16 = (X.scalar_type() == at::kBFloat16);
at::Tensor X_ = is_bfloat16 ? X.to(ScalarType::Float) : X;
at::Tensor dY_ = is_bfloat16 ? dY.to(ScalarType::Float) : dY;
at::Tensor scale_ = is_bfloat16 ? scale.to(ScalarType::Float) : scale;
at::Tensor zero_point_ = is_bfloat16 ? zero_point.to(ScalarType::Float) : zero_point;
float scale_val = scale_[0].item<float>();
float inv_scale_val = 1.0f / scale_val;
int64_t zero_point_val = native::_get_zero_point_from_tensor(zero_point_, quant_min, quant_max, false);
TORCH_CHECK(dY_.scalar_type() == ScalarType::Float);
TORCH_CHECK(X_.scalar_type() == ScalarType::Float);
TORCH_CHECK(scale_.scalar_type() == ScalarType::Float);
TORCH_CHECK(zero_point_.scalar_type() == ScalarType::Float);
TORCH_CHECK(X_.numel() == dY_.numel(), "`X` and `dY` are not the same size");
TORCH_CHECK(
quant_min <= 0 && quant_max >= 0,
"`quant_min` should be less than or \
@ -200,28 +208,28 @@ std::tuple<Tensor, Tensor, Tensor> _fake_quantize_learnable_per_tensor_affine_ba
TORCH_CHECK(
zero_point_val >= quant_min && zero_point_val <= quant_max,
"`zero_point` must be between `quant_min` and `quant_max`.");
if (X.numel() <= 0) {
if (X_.numel() <= 0) {
return std::make_tuple(X, scale, zero_point);
}
auto dX = at::empty_like(X, X.options(), MemoryFormat::Preserve);
auto dScale_vec = at::empty_like(X, X.options(), MemoryFormat::Preserve);
auto dZeroPoint_vec = at::empty_like(X, X.options(), MemoryFormat::Preserve);
auto dX = at::empty_like(X_, X_.options(), MemoryFormat::Preserve);
auto dScale_vec = at::empty_like(X_, X_.options(), MemoryFormat::Preserve);
auto dZeroPoint_vec = at::empty_like(X_, X_.options(), MemoryFormat::Preserve);
auto iter = TensorIteratorConfig()
.add_output(dX)
.add_output(dScale_vec)
.add_output(dZeroPoint_vec)
.add_input(X)
.add_input(dY)
.add_input(X_)
.add_input(dY_)
.build();
fake_quant_grad_learnable_tensor_stub(
X.device().type(), iter, scale_val, inv_scale_val, zero_point_val, quant_min, quant_max, grad_factor);
X_.device().type(), iter, scale_val, inv_scale_val, zero_point_val, quant_min, quant_max, grad_factor);
// The total sums over the scale and zero point gradient vectors are what will be returned in the end.
auto dScale = dScale_vec.sum().unsqueeze(0).to(scale.device());
auto dZeroPoint = dZeroPoint_vec.sum().unsqueeze(0).to(zero_point.device());
auto dScale = dScale_vec.sum().unsqueeze(0).to(scale_.device());
auto dZeroPoint = dZeroPoint_vec.sum().unsqueeze(0).to(zero_point_.device());
return std::make_tuple(dX, dScale, dZeroPoint);
}