[quant][fx] Make scale, zero_point buffers in the model, use FQN (for quantize_per_tensor ops) (#51171)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/51171 Following up on previous PR, this PR makes scale and zero_point for quantize_per_tensor to be registered as buffers in the module. Currently the dtype is still stored as attr (not registered as buffer) since we can only register tensor types. Test Plan: python test/test_quantization.py test_qparams_buffers Imported from OSS Reviewed By: jerryzh168 Differential Revision: D26092964 fbshipit-source-id: a54d914db7863402f2b5a3ba2c8ce8b27c18b47b
2025-11-06 00:54:56 +08:00 · 2021-01-28 08:29:57 -08:00
parent 4c3f59b70e
commit 288b94a8ee
4 changed files with 57 additions and 36 deletions
--- a/torch/quantization/fx/quantization_patterns.py
+++ b/torch/quantization/fx/quantization_patterns.py
@ -298,9 +298,10 @@ class ConvRelu(QuantizeHandler):
                if activation_statically_quantized:
                    root_module = quantizer.modules['']
                    act_post_process_name = self.relu_node.name if self.relu_node else self.conv_node.name
+                    act_post_process_node = self.relu_node if self.relu_node else self.conv_node
                    return quantize_node(
-                        root_module, quantizer.quantized_graph, op_out,
-                        quantizer.activation_post_process_map[act_post_process_name])
+                        quantizer, op_out, quantizer.activation_post_process_map[act_post_process_name],
+                        act_post_process_node, is_input=False)
                else:
                    # output for dynamically quantized conv op is not quantized
                    return op_out
@ -437,11 +438,13 @@ class LinearReLUQuantizeHandler(QuantizeHandler):
                    # quantize output for statically quantized linear op
                    root_module = quantizer.modules['']
                    act_post_process_name = self.relu_node.name if self.relu_node else self.linear_node.name
+                    act_post_process_node = self.relu_node if self.relu_node else self.linear_node
                    return quantize_node(
-                        root_module,
-                        quantizer.quantized_graph,
+                        quantizer,
                        op_out,
-                        quantizer.activation_post_process_map[act_post_process_name])
+                        quantizer.activation_post_process_map[act_post_process_name],
+                        act_post_process_node,
+                        is_input=False)
                else:
                    # output for dynamically quantized linear op is not quantized
                    return op_out
@ -792,9 +795,8 @@ class DefaultQuantizeHandler(QuantizeHandler):
        assert self.all_node_args
        root_module = quantizer.modules['']
        return quantize_node(
-            root_module,
-            quantizer.quantized_graph,
-            node, quantizer.activation_post_process_map[node.name])
+            quantizer,
+            node, quantizer.activation_post_process_map[node.name], node, is_input=False)

 class CustomModuleQuantizeHandler(QuantizeHandler):
    def convert(self, quantizer: QuantizerCls, node: Node, load_arg: Callable,