mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/52377 Add QNNPACK specific packed params for sparse linear. Add sparse linear dynamic op with appropriate registration. Add python side LinearDynamic module for sparsity. Add tests to validate sparse linear qnnpack kernels. Note that since these test are mostly run on x86 platform and given that 1x4 sparse kernels are implemented both in sse and arm, LinearDynamic at the moment defaults to 1x4 pattern. Plan is to add another diff that will allow a global override for 8x1 pattern such that prepare/convert flow can work for exporting model for mobile. Test Plan: buck run caffe2/torch/fb/model_optimization:sparsity_test Reviewed By: z-a-f Differential Revision: D26491944 fbshipit-source-id: b98839b4c62664e1fabbb0cbeb2e5c1bd5903b4d
168 lines
5.9 KiB
Python
168 lines
5.9 KiB
Python
r"""Importing this file includes common utility methods for checking quantized
|
|
tensors and modules.
|
|
"""
|
|
import numpy as np
|
|
import torch
|
|
from contextlib import contextmanager
|
|
from torch.testing._internal.common_utils import TEST_WITH_ASAN, TEST_WITH_TSAN, TEST_WITH_UBSAN, IS_PPC, IS_MACOS, IS_WINDOWS
|
|
|
|
supported_qengines = torch.backends.quantized.supported_engines
|
|
supported_qengines.remove('none')
|
|
# Note: We currently do not run QNNPACK tests on WINDOWS and MACOS as it is flaky. Issue #29326
|
|
# QNNPACK is not supported on PPC
|
|
# QNNPACK throws ASAN heap-buffer-overflow error.
|
|
if 'qnnpack' in supported_qengines and any([IS_PPC, TEST_WITH_ASAN, TEST_WITH_TSAN, TEST_WITH_UBSAN, IS_MACOS, IS_WINDOWS]):
|
|
supported_qengines.remove('qnnpack')
|
|
|
|
def _conv_output_shape(input_size, kernel_size, padding, stride, dilation,
|
|
output_padding=0):
|
|
"""Computes the output shape given convolution parameters."""
|
|
return np.floor((input_size + 2 * padding - kernel_size - (kernel_size - 1)
|
|
* (dilation - 1)) / stride) + 2 * output_padding + 1
|
|
|
|
# Quantization references
|
|
def _quantize(x, scale, zero_point, qmin=None, qmax=None, dtype=np.uint8):
|
|
"""Quantizes a numpy array."""
|
|
if qmin is None:
|
|
qmin = np.iinfo(dtype).min
|
|
if qmax is None:
|
|
qmax = np.iinfo(dtype).max
|
|
qx = np.round(x / scale + zero_point).astype(np.int64)
|
|
qx = np.clip(qx, qmin, qmax)
|
|
qx = qx.astype(dtype)
|
|
return qx
|
|
|
|
|
|
def _dequantize(qx, scale, zero_point):
|
|
"""Dequantizes a numpy array."""
|
|
x = (qx.astype(float) - zero_point) * scale
|
|
return x
|
|
|
|
|
|
def _requantize(x, multiplier, zero_point, qmin=0, qmax=255, qtype=np.uint8):
|
|
"""Requantizes a numpy array, i.e., intermediate int32 or int16 values are
|
|
converted back to given type"""
|
|
qx = (x * multiplier).round() + zero_point
|
|
qx = np.clip(qx, qmin, qmax).astype(qtype)
|
|
return qx
|
|
|
|
def _calculate_dynamic_qparams(X, dtype, reduce_range=False):
|
|
"""Calculate the dynamic quantization parameters (scale, zero_point)
|
|
according to the min and max element of the tensor"""
|
|
if isinstance(X, torch.Tensor):
|
|
X = X.numpy()
|
|
if dtype == torch.qint8:
|
|
if reduce_range:
|
|
qmin, qmax = -64, 63
|
|
else:
|
|
qmin, qmax = -128, 127
|
|
else: # dtype == torch.quint8
|
|
if reduce_range:
|
|
qmin, qmax = 0, 127
|
|
else:
|
|
qmin, qmax = 0, 255
|
|
min_val = X.min()
|
|
max_val = X.max()
|
|
if min_val == max_val:
|
|
scale = 1.0
|
|
zero_point = 0
|
|
else:
|
|
max_val = max(max_val, 0.0)
|
|
min_val = min(min_val, 0.0)
|
|
scale = (max_val - min_val) / (qmax - qmin)
|
|
scale = max(scale, np.finfo(np.float32).eps)
|
|
zero_point = qmin - round(min_val / scale)
|
|
zero_point = max(qmin, zero_point)
|
|
zero_point = min(qmax, zero_point)
|
|
return [float(scale), int(zero_point)]
|
|
|
|
def _calculate_dynamic_per_channel_qparams(X, dtype):
|
|
"""Calculate the dynamic quantization parameters (scale, zero_point)
|
|
according to the min and max element of the tensor"""
|
|
if isinstance(X, torch.Tensor):
|
|
X = X.numpy()
|
|
qmin, qmax = torch.iinfo(dtype).min, torch.iinfo(dtype).max
|
|
n_levels = qmax - qmin
|
|
scale = np.zeros(X.shape[0], dtype=np.float64)
|
|
zero_point = np.zeros(X.shape[0], dtype=np.int64)
|
|
for i in range(zero_point.shape[0]):
|
|
min_val = X.min()
|
|
max_val = X.max()
|
|
if min_val == max_val:
|
|
scale[i] = 1.0
|
|
zero_point[i] = 0
|
|
else:
|
|
max_val = max(max_val, 0.0)
|
|
min_val = min(min_val, 0.0)
|
|
scale[i] = (max_val - min_val) / n_levels
|
|
scale[i] = max(scale[i], np.finfo(np.float32).eps)
|
|
zero_point[i] = qmin - round(min_val / scale[i])
|
|
zero_point[i] = max(qmin, zero_point[i])
|
|
zero_point[i] = min(qmax, zero_point[i])
|
|
|
|
return scale, zero_point
|
|
|
|
def _snr(x, x_hat):
|
|
"""Calculates the signal to noise ratio and returns the signal and noise
|
|
power, as well as the SNR in dB.
|
|
If the input is a list/tuple this function is called recursively on each
|
|
element. The result will have the same nested structure as the inputs.
|
|
|
|
Args:
|
|
x, x_hat: Either a tensor or a nested list/tuple of tensors.
|
|
Returns:
|
|
signal, noise, SNR(in dB): Either floats or a nested list of floats
|
|
"""
|
|
if isinstance(x, (list, tuple)):
|
|
assert(len(x) == len(x_hat))
|
|
res = []
|
|
for idx in range(len(x)):
|
|
res.append(_snr(x[idx], x_hat[idx]))
|
|
return res
|
|
if x_hat.is_quantized:
|
|
x_hat = x_hat.dequantize()
|
|
if x.is_quantized:
|
|
x = x.dequantize()
|
|
noise = (x - x_hat).norm()
|
|
if noise == 0:
|
|
return 0.0, float('inf'), float('inf')
|
|
signal = x.norm()
|
|
snr = signal / noise
|
|
snr_db = 20 * snr.log10()
|
|
return signal, noise, snr_db
|
|
|
|
@contextmanager
|
|
def override_quantized_engine(qengine):
|
|
previous = torch.backends.quantized.engine
|
|
torch.backends.quantized.engine = qengine
|
|
try:
|
|
yield
|
|
finally:
|
|
torch.backends.quantized.engine = previous
|
|
|
|
@contextmanager
|
|
def override_cpu_allocator_for_qnnpack(qengine_is_qnnpack):
|
|
try:
|
|
if qengine_is_qnnpack:
|
|
torch._C._set_default_mobile_cpu_allocator()
|
|
yield
|
|
finally:
|
|
if qengine_is_qnnpack:
|
|
torch._C._unset_default_mobile_cpu_allocator()
|
|
|
|
# TODO: Update all quantization tests to use this decorator.
|
|
# Currently for some of the tests it seems to have inconsistent params
|
|
# for fbgemm vs qnnpack.
|
|
def override_qengines(qfunction):
|
|
def test_fn(*args, **kwargs):
|
|
for qengine in supported_qengines:
|
|
with override_quantized_engine(qengine):
|
|
# qfunction should not return anything.
|
|
qfunction(*args, **kwargs)
|
|
return test_fn
|
|
|
|
def qengine_is_fbgemm():
|
|
return torch.backends.quantized.engine == 'fbgemm'
|
|
def qengine_is_qnnpack():
|
|
return torch.backends.quantized.engine == 'qnnpack'
|