[AOTI][CPU] Consider bias=None case for fbgemm_linear_fp16_weight (#158535)

Test Plan:

Rollback Plan:

Differential Revision: D78458214

Pull Request resolved: https://github.com/pytorch/pytorch/pull/158535
Approved by: https://github.com/houseroad, https://github.com/henryoier, https://github.com/jingsh
This commit is contained in:
Huamin Li
2025-07-21 23:42:40 +00:00
committed by PyTorch MergeBot
parent 08540b13c6
commit 2c37acfd89
10 changed files with 53 additions and 17 deletions

View File

@ -3550,14 +3550,15 @@ class TestDynamicQuantizedOps(TestCase):
(2, 4), # batch_size
(4, 5), # input_channels
(4, 7), # output_channels
(True, False), # bias None or not
)
for batch_size, input_channels, output_channels in options:
for batch_size, input_channels, output_channels, bias_is_none in options:
pack_op = torch.ops._quantized.wrapped_fbgemm_pack_gemm_matrix_fp16
linear_op = torch.ops._quantized.wrapped_fbgemm_linear_fp16_weight
x = torch.randn(batch_size, input_channels)
w = torch.randn(output_channels, input_channels)
bias = torch.randn(output_channels)
bias = torch.randn(output_channels) if not bias_is_none else None
w_packed = pack_op(w)
out = linear_op(x, w_packed, bias, output_channels)
@ -3591,6 +3592,18 @@ class TestDynamicQuantizedOps(TestCase):
self.assertEqual(ref_out, compiled_out)
def func(X, W):
packed_W = torch.ops._quantized.wrapped_fbgemm_pack_gemm_matrix_fp16(W)
return torch.ops._quantized.wrapped_fbgemm_linear_fp16_weight(X, packed_W, None, W.size(0))
ref_out = func(x, w)
compiled = torch.compile(func)
compiled_out = compiled(x, w)
self.assertEqual(ref_out, compiled_out)
"""Tests the correctness of the dynamic quantized lstm/gru."""
def _get_rnn_inputs(self, seq_len, num_batches, input_size, hidden_size, num_directions, reduce_range):