mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-06 00:54:56 +08:00
[quant][fx] Make scale, zero_point buffers in the model, use FQN (for quantize_per_tensor ops) (#51171)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/51171 Following up on previous PR, this PR makes scale and zero_point for quantize_per_tensor to be registered as buffers in the module. Currently the dtype is still stored as attr (not registered as buffer) since we can only register tensor types. Test Plan: python test/test_quantization.py test_qparams_buffers Imported from OSS Reviewed By: jerryzh168 Differential Revision: D26092964 fbshipit-source-id: a54d914db7863402f2b5a3ba2c8ce8b27c18b47b
This commit is contained in:
committed by
Facebook GitHub Bot
parent
4c3f59b70e
commit
288b94a8ee
@ -298,9 +298,10 @@ class ConvRelu(QuantizeHandler):
|
||||
if activation_statically_quantized:
|
||||
root_module = quantizer.modules['']
|
||||
act_post_process_name = self.relu_node.name if self.relu_node else self.conv_node.name
|
||||
act_post_process_node = self.relu_node if self.relu_node else self.conv_node
|
||||
return quantize_node(
|
||||
root_module, quantizer.quantized_graph, op_out,
|
||||
quantizer.activation_post_process_map[act_post_process_name])
|
||||
quantizer, op_out, quantizer.activation_post_process_map[act_post_process_name],
|
||||
act_post_process_node, is_input=False)
|
||||
else:
|
||||
# output for dynamically quantized conv op is not quantized
|
||||
return op_out
|
||||
@ -437,11 +438,13 @@ class LinearReLUQuantizeHandler(QuantizeHandler):
|
||||
# quantize output for statically quantized linear op
|
||||
root_module = quantizer.modules['']
|
||||
act_post_process_name = self.relu_node.name if self.relu_node else self.linear_node.name
|
||||
act_post_process_node = self.relu_node if self.relu_node else self.linear_node
|
||||
return quantize_node(
|
||||
root_module,
|
||||
quantizer.quantized_graph,
|
||||
quantizer,
|
||||
op_out,
|
||||
quantizer.activation_post_process_map[act_post_process_name])
|
||||
quantizer.activation_post_process_map[act_post_process_name],
|
||||
act_post_process_node,
|
||||
is_input=False)
|
||||
else:
|
||||
# output for dynamically quantized linear op is not quantized
|
||||
return op_out
|
||||
@ -792,9 +795,8 @@ class DefaultQuantizeHandler(QuantizeHandler):
|
||||
assert self.all_node_args
|
||||
root_module = quantizer.modules['']
|
||||
return quantize_node(
|
||||
root_module,
|
||||
quantizer.quantized_graph,
|
||||
node, quantizer.activation_post_process_map[node.name])
|
||||
quantizer,
|
||||
node, quantizer.activation_post_process_map[node.name], node, is_input=False)
|
||||
|
||||
class CustomModuleQuantizeHandler(QuantizeHandler):
|
||||
def convert(self, quantizer: QuantizerCls, node: Node, load_arg: Callable,
|
||||
|
||||
Reference in New Issue
Block a user