Change Bias to QTensor with qint32(int32_t) (#20713)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/20713

As Title says.

Reviewed By: zafartahirov

Differential Revision: D15410734

fbshipit-source-id: c00f409278736cf9e3205f7d36dda1b96120f47d
This commit is contained in:
Jianyu Huang
2019-05-21 12:32:59 -07:00
committed by Facebook Github Bot
parent b9a150ede0
commit e6f22e1b89
2 changed files with 23 additions and 7 deletions

View File

@ -95,6 +95,8 @@ class QFCInt8 final : public c10::OperatorKernel {
// TODO: contiguous is called for further jit optimizations.
auto bias_contig = bias.contiguous();
const auto* bias_ptr =
reinterpret_cast<int32_t*>(bias_contig.data<c10::qint32>());
// After the uint8 * int8 matrix multiplication is performed, this operation
// does:
@ -108,7 +110,7 @@ class QFCInt8 final : public c10::OperatorKernel {
/*Bq_zero_point=*/&weight_zero_point_int32,
/*row_offsets=*/packA.getRowOffsetBuffer(),
/*col_offsets=*/col_offsets.data(),
/*bias=*/bias_contig.data<int32_t>(),
/*bias=*/bias_ptr,
/*nCol=*/N);
// Allocate output Tensor and a buffer for fbgemmPacked to use

View File

@ -227,6 +227,12 @@ class TestQuantizedFC(unittest.TestCase):
+ W_value_min
).astype(np.int8)
b_value_min = -10
b_value_max = 10
b_q0 = np.round(
np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min
).astype(np.int32)
avoid_vpmaddubsw_overflow_fc(
batch_size,
input_channels,
@ -241,10 +247,11 @@ class TestQuantizedFC(unittest.TestCase):
X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float)
W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float)
b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float)
X_q = X.quantize_linear(scale=X_scale, zero_point=X_zp, dtype=torch.quint8)
W_q = W.quantize_linear(scale=W_scale, zero_point=W_zp, dtype=torch.qint8)
b_q = torch.round(torch.rand(output_channels) * 10 - 10).to(dtype=torch.int32)
b_q = b.quantize_linear(scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32)
# Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with
# Y_scale * 255 (max for uint8).
@ -252,7 +259,7 @@ class TestQuantizedFC(unittest.TestCase):
Y_zp = 5
# Reference quantized FC operator
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q.numpy(), Y_scale, Y_zp)
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp)
# Weight prepacking operator for quantized FC
W_prepack = qfc_prepack(W_q)
@ -268,7 +275,7 @@ class TestQuantizedFC(unittest.TestCase):
# Reference quantized result from PyTorch Linear operator
W_fp32 = W_q.dequantize().to(dtype=torch.float)
X_fp32 = X_q.dequantize().to(dtype=torch.float)
b_fp32 = torch.from_numpy(_dequantize(b_q.numpy(), W_scale * X_scale, 0).astype(np.float)).to(dtype=torch.float)
b_fp32 = b_q.dequantize().to(dtype=torch.float)
Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
Y_q_ref2 = Y_fp32_ref.quantize_linear(Y_scale, Y_zp, torch.quint8)
@ -304,6 +311,12 @@ class TestQuantizedFC(unittest.TestCase):
+ W_value_min
).astype(np.int8)
b_value_min = -10
b_value_max = 10
b_q0 = np.round(
np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min
).astype(np.int32)
avoid_vpmaddubsw_overflow_fc(
batch_size,
input_channels,
@ -318,10 +331,11 @@ class TestQuantizedFC(unittest.TestCase):
X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float)
W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float)
b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float)
X_q = X.quantize_linear(scale=X_scale, zero_point=X_zp, dtype=torch.quint8)
W_q = W.quantize_linear(scale=W_scale, zero_point=W_zp, dtype=torch.qint8)
b_q = torch.round(torch.rand(output_channels) * 10 - 10).to(dtype=torch.int32)
b_q = b.quantize_linear(scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32)
# Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with
# Y_scale * 255 (max for uint8).
@ -329,7 +343,7 @@ class TestQuantizedFC(unittest.TestCase):
Y_zp = 5
# Reference quantized FC operator
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q.numpy(), Y_scale, Y_zp)
Y_q_ref = qfc_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp)
Y_q_ref[Y_q_ref < Y_zp] = Y_zp
# Weight prepacking operator for quantized FC
@ -346,7 +360,7 @@ class TestQuantizedFC(unittest.TestCase):
# Reference quantized result from PyTorch Linear operator
W_fp32 = W_q.dequantize().to(dtype=torch.float)
X_fp32 = X_q.dequantize().to(dtype=torch.float)
b_fp32 = torch.from_numpy(_dequantize(b_q.numpy(), W_scale * X_scale, 0).astype(np.float)).to(dtype=torch.float)
b_fp32 = b_q.dequantize().to(dtype=torch.float)
Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32)
Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0
Y_q_ref2 = Y_fp32_ref.quantize_linear(Y_scale, Y_zp, torch.quint8)