[quant][fx] Make scale, zero_point buffers in the model, use FQN (for quantize_per_tensor ops) (#51171)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/51171

Following up on previous PR, this PR makes scale and zero_point for quantize_per_tensor to be
registered as buffers in the module.
Currently the dtype is still stored as attr (not registered as buffer) since we can only register tensor types.

Test Plan:
python test/test_quantization.py test_qparams_buffers

Imported from OSS

Reviewed By: jerryzh168

Differential Revision: D26092964

fbshipit-source-id: a54d914db7863402f2b5a3ba2c8ce8b27c18b47b
This commit is contained in:
Supriya Rao
2021-01-28 08:29:57 -08:00
committed by Facebook GitHub Bot
parent 4c3f59b70e
commit 288b94a8ee
4 changed files with 57 additions and 36 deletions

View File

@ -298,9 +298,10 @@ class ConvRelu(QuantizeHandler):
if activation_statically_quantized:
root_module = quantizer.modules['']
act_post_process_name = self.relu_node.name if self.relu_node else self.conv_node.name
act_post_process_node = self.relu_node if self.relu_node else self.conv_node
return quantize_node(
root_module, quantizer.quantized_graph, op_out,
quantizer.activation_post_process_map[act_post_process_name])
quantizer, op_out, quantizer.activation_post_process_map[act_post_process_name],
act_post_process_node, is_input=False)
else:
# output for dynamically quantized conv op is not quantized
return op_out
@ -437,11 +438,13 @@ class LinearReLUQuantizeHandler(QuantizeHandler):
# quantize output for statically quantized linear op
root_module = quantizer.modules['']
act_post_process_name = self.relu_node.name if self.relu_node else self.linear_node.name
act_post_process_node = self.relu_node if self.relu_node else self.linear_node
return quantize_node(
root_module,
quantizer.quantized_graph,
quantizer,
op_out,
quantizer.activation_post_process_map[act_post_process_name])
quantizer.activation_post_process_map[act_post_process_name],
act_post_process_node,
is_input=False)
else:
# output for dynamically quantized linear op is not quantized
return op_out
@ -792,9 +795,8 @@ class DefaultQuantizeHandler(QuantizeHandler):
assert self.all_node_args
root_module = quantizer.modules['']
return quantize_node(
root_module,
quantizer.quantized_graph,
node, quantizer.activation_post_process_map[node.name])
quantizer,
node, quantizer.activation_post_process_map[node.name], node, is_input=False)
class CustomModuleQuantizeHandler(QuantizeHandler):
def convert(self, quantizer: QuantizerCls, node: Node, load_arg: Callable,