[quant][pt2e] Move batch norm op between eval/train for cuda (#123957)

Summary: Before in `move_exported_model_to_train/eval`, we only switched the CPU versions of the batch norm op. This commit adds support for the cuda versions of the op too. Note that this fix is temporary; we won't have to differentiate between these two cases once we have batch norm consolidation. Test Plan: python test/test_quantization.py -k test_move_exported_model_bn Reviewers: jerryzh168 Subscribers: jerryzh168, leslie-fang-intel, supriyar Differential Revision: [D56070054](https://our.internmc.facebook.com/intern/diff/D56070054) Pull Request resolved: https://github.com/pytorch/pytorch/pull/123957 Approved by: https://github.com/jerryzh168
2025-10-20 21:14:14 +08:00 · 2024-04-23 14:13:01 -07:00
parent 64af899fdf
commit 4efb28c900
4 changed files with 81 additions and 45 deletions
--- a/test/quantization/pt2e/test_quantize_pt2e.py
+++ b/test/quantization/pt2e/test_quantize_pt2e.py
@ -1826,6 +1826,18 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
    def test_move_exported_model_dropout_inplace(self):
        self._test_move_exported_model_dropout(inplace=True)

+    def _get_bn_train_eval_ops(self, is_cuda: bool):
+        if is_cuda:
+            return (
+                torch.ops.aten.cudnn_batch_norm.default,
+                torch.ops.aten.cudnn_batch_norm.default,
+            )
+        else:
+            return (
+                torch.ops.aten._native_batch_norm_legit.default,
+                torch.ops.aten._native_batch_norm_legit_no_training.default,
+            )
+
    def test_move_exported_model_bn(self):
        """
        Test switching batch_norm behavior between train and eval modes using
@ -1840,12 +1852,18 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
            def forward(self, x):
                return self.bn(x)

-        example_inputs = (torch.randn(1, 3, 3, 3),)
-        m = M().train()
+        is_cuda = torch.cuda.is_available()
+        if is_cuda:
+            m = M().train().cuda()
+            example_inputs = (torch.randn(1, 3, 3, 3).cuda(),)
+        else:
+            m = M().train()
+            example_inputs = (torch.randn(1, 3, 3, 3),)
+        bn_train_op, bn_eval_op = self._get_bn_train_eval_ops(is_cuda)
        m = capture_pre_autograd_graph(m, example_inputs)

        # Assert that batch norm op exists and is in train mode
-        bn_node = self._get_node(m, torch.ops.aten._native_batch_norm_legit.default)
+        bn_node = self._get_node(m, bn_train_op)
        self.assertTrue(bn_node is not None)
        self.assertTrue(bn_node.args[5])

@ -1853,16 +1871,14 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
        torch.ao.quantization.move_exported_model_to_eval(m)

        # Assert that batch norm op is now in eval mode
-        bn_node = self._get_node(
-            m, torch.ops.aten._native_batch_norm_legit_no_training.default
-        )
+        bn_node = self._get_node(m, bn_eval_op)
        self.assertTrue(bn_node is not None)

        # Move to train
        torch.ao.quantization.move_exported_model_to_train(m)

        # Assert that batch norm op is now in train mode again
-        bn_node = self._get_node(m, torch.ops.aten._native_batch_norm_legit.default)
+        bn_node = self._get_node(m, bn_train_op)
        self.assertTrue(bn_node is not None)
        self.assertTrue(bn_node.args[5])

@ -1908,22 +1924,25 @@ class TestQuantizePT2E(PT2EQuantizationTestCase):
                x = self.dropout(x)
                return x

-        example_inputs = (torch.randn(1, 3, 3, 3),)
-        m = M().train()
+        is_cuda = torch.cuda.is_available()
+        if is_cuda:
+            m = M().train().cuda()
+            example_inputs = (torch.randn(1, 3, 3, 3).cuda(),)
+        else:
+            m = M().train()
+            example_inputs = (torch.randn(1, 3, 3, 3),)
+        bn_train_op, bn_eval_op = self._get_bn_train_eval_ops(is_cuda)
        m = capture_pre_autograd_graph(m, example_inputs)

        def _assert_ops_are_correct(m: torch.fx.GraphModule, train: bool):
            targets = [n.target for n in m.graph.nodes]
-            bn_train_target = torch.ops.aten._native_batch_norm_legit.default
-            bn_eval_target = torch.ops.aten._native_batch_norm_legit_no_training.default
-            if train:
-                self.assertTrue(bn_train_target in targets)
-                self.assertTrue(bn_eval_target not in targets)
-            else:
-                self.assertTrue(bn_eval_target in targets)
-                self.assertTrue(bn_train_target not in targets)
+            bn_op = bn_train_op if train else bn_eval_op
+            bn_node = self._get_node(m, bn_op)
+            self.assertTrue(bn_node is not None)
+            if is_cuda:
+                self.assertEqual(bn_node.args[5], train)
            dropout_node = self._get_node(m, torch.ops.aten.dropout.default)
-            self.assertTrue(dropout_node.args[2] == train)
+            self.assertEqual(dropout_node.args[2], train)

        # Before wrapping: this is not OK
        with self.assertRaises(NotImplementedError):