mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Change Bias to QTensor with qint32(int32_t) (#20713)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/20713 As Title says. Reviewed By: zafartahirov Differential Revision: D15410734 fbshipit-source-id: c00f409278736cf9e3205f7d36dda1b96120f47d
This commit is contained in:
committed by
Facebook Github Bot
parent
b9a150ede0
commit
e6f22e1b89
@ -95,6 +95,8 @@ class QFCInt8 final : public c10::OperatorKernel {
|
||||
|
||||
// TODO: contiguous is called for further jit optimizations.
|
||||
auto bias_contig = bias.contiguous();
|
||||
const auto* bias_ptr =
|
||||
reinterpret_cast<int32_t*>(bias_contig.data<c10::qint32>());
|
||||
|
||||
// After the uint8 * int8 matrix multiplication is performed, this operation
|
||||
// does:
|
||||
@ -108,7 +110,7 @@ class QFCInt8 final : public c10::OperatorKernel {
|
||||
/*Bq_zero_point=*/&weight_zero_point_int32,
|
||||
/*row_offsets=*/packA.getRowOffsetBuffer(),
|
||||
/*col_offsets=*/col_offsets.data(),
|
||||
/*bias=*/bias_contig.data<int32_t>(),
|
||||
/*bias=*/bias_ptr,
|
||||
/*nCol=*/N);
|
||||
|
||||
// Allocate output Tensor and a buffer for fbgemmPacked to use
|
||||
|
@ -227,6 +227,12 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
+ W_value_min
|
||||
).astype(np.int8)
|
||||
|
||||
b_value_min = -10
|
||||
b_value_max = 10
|
||||
b_q0 = np.round(
|
||||
np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min
|
||||
).astype(np.int32)
|
||||
|
||||
avoid_vpmaddubsw_overflow_fc(
|
||||
batch_size,
|
||||
input_channels,
|
||||
@ -241,10 +247,11 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
|
||||
X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float)
|
||||
W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float)
|
||||
b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float)
|
||||
|
||||
X_q = X.quantize_linear(scale=X_scale, zero_point=X_zp, dtype=torch.quint8)
|
||||
W_q = W.quantize_linear(scale=W_scale, zero_point=W_zp, dtype=torch.qint8)
|
||||
b_q = torch.round(torch.rand(output_channels) * 10 - 10).to(dtype=torch.int32)
|
||||
b_q = b.quantize_linear(scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32)
|
||||
|
||||
# Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with
|
||||
# Y_scale * 255 (max for uint8).
|
||||
@ -252,7 +259,7 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
Y_zp = 5
|
||||
|
||||
# Reference quantized FC operator
|
||||
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q.numpy(), Y_scale, Y_zp)
|
||||
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp)
|
||||
|
||||
# Weight prepacking operator for quantized FC
|
||||
W_prepack = qfc_prepack(W_q)
|
||||
@ -268,7 +275,7 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
# Reference quantized result from PyTorch Linear operator
|
||||
W_fp32 = W_q.dequantize().to(dtype=torch.float)
|
||||
X_fp32 = X_q.dequantize().to(dtype=torch.float)
|
||||
b_fp32 = torch.from_numpy(_dequantize(b_q.numpy(), W_scale * X_scale, 0).astype(np.float)).to(dtype=torch.float)
|
||||
b_fp32 = b_q.dequantize().to(dtype=torch.float)
|
||||
Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
|
||||
Y_q_ref2 = Y_fp32_ref.quantize_linear(Y_scale, Y_zp, torch.quint8)
|
||||
|
||||
@ -304,6 +311,12 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
+ W_value_min
|
||||
).astype(np.int8)
|
||||
|
||||
b_value_min = -10
|
||||
b_value_max = 10
|
||||
b_q0 = np.round(
|
||||
np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min
|
||||
).astype(np.int32)
|
||||
|
||||
avoid_vpmaddubsw_overflow_fc(
|
||||
batch_size,
|
||||
input_channels,
|
||||
@ -318,10 +331,11 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
|
||||
X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float)
|
||||
W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float)
|
||||
b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float)
|
||||
|
||||
X_q = X.quantize_linear(scale=X_scale, zero_point=X_zp, dtype=torch.quint8)
|
||||
W_q = W.quantize_linear(scale=W_scale, zero_point=W_zp, dtype=torch.qint8)
|
||||
b_q = torch.round(torch.rand(output_channels) * 10 - 10).to(dtype=torch.int32)
|
||||
b_q = b.quantize_linear(scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32)
|
||||
|
||||
# Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with
|
||||
# Y_scale * 255 (max for uint8).
|
||||
@ -329,7 +343,7 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
Y_zp = 5
|
||||
|
||||
# Reference quantized FC operator
|
||||
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q.numpy(), Y_scale, Y_zp)
|
||||
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp)
|
||||
Y_q_ref[Y_q_ref < Y_zp] = Y_zp
|
||||
|
||||
# Weight prepacking operator for quantized FC
|
||||
@ -346,7 +360,7 @@ class TestQuantizedFC(unittest.TestCase):
|
||||
# Reference quantized result from PyTorch Linear operator
|
||||
W_fp32 = W_q.dequantize().to(dtype=torch.float)
|
||||
X_fp32 = X_q.dequantize().to(dtype=torch.float)
|
||||
b_fp32 = torch.from_numpy(_dequantize(b_q.numpy(), W_scale * X_scale, 0).astype(np.float)).to(dtype=torch.float)
|
||||
b_fp32 = b_q.dequantize().to(dtype=torch.float)
|
||||
Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
|
||||
Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0
|
||||
Y_q_ref2 = Y_fp32_ref.quantize_linear(Y_scale, Y_zp, torch.quint8)
|
||||
|
Reference in New Issue
Block a user