Conv BN folding data type issue when conv has no bias (#78241)

PR https://github.com/pytorch/pytorch/pull/77042 has fixed the new folding conv-bn data type issue but missing the case when original conv has no bias input. In this PR: - Fix the new folding conv-bn's bias data type issue, when conv has no bias but weight as lower precision datatype, the new generated bias data type should be same as conv's weight. - Move the Autocast JIT Trace UT from `test_jit.py` to `test_jit_autocast.py`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/78241 Approved by: https://github.com/davidberard98
2025-10-20 21:14:14 +08:00 · 2022-05-26 18:42:17 +00:00
parent dde56ca329
commit 1a41cd8f97
3 changed files with 70 additions and 48 deletions
--- a/test/test_jit.py
+++ b/test/test_jit.py
@ -71,7 +71,6 @@ from jit.test_aten_pow import TestAtenPow  # noqa: F401
 from jit.test_optimize_for_mobile_preserve_debug_info import TestOptimizeForMobilePreserveDebugInfo  # noqa: F401
 from jit.test_union import TestUnion  # noqa: F401
 from jit.test_legacy_upgraders import TestLegacyUpgraders  # noqa: F401
-from jit.test_models import MnistNet
 from jit.test_batch_mm import TestBatchMM  # noqa: F401
 from jit.test_dtype_analysis import TestDtypeAnalysis, TestDtypeCustomRulesCPU  # noqa: F401
 from jit.test_device_analysis import TestDeviceAnalysis  # noqa: F401
@ -16197,50 +16196,6 @@ class TestJitGeneratedModule(JitTestCase):
 class TestJitGeneratedFunctional(JitTestCase):
    pass

-class TestJitAutocast(JitTestCase):
-    def setUp(self):
-        super(TestJitAutocast, self).setUp()
-        self.models = [MnistNet()]
-        self.inputs = [torch.randn(5, 1, 28, 28, device='cpu')]
-
-    def tearDown(self):
-        super(TestJitAutocast, self).tearDown()
-
-    def test_generate_autocast_jit_trace_model(self):
-        def test_generate_autocast_jit_trace_model(model, x):
-            model.eval()
-            with torch.cpu.amp.autocast(cache_enabled=False), torch.no_grad():
-                traced_model = torch.jit.trace(model, x)
-        for i in range(self.models.__len__()):
-            test_generate_autocast_jit_trace_model(self.models[i], self.inputs[i])
-
-    def test_nchw_autocast_jit_trace_model(self):
-        def test_nchw_autocast_jit_trace_model(model, x):
-            model.eval()
-            with torch.cpu.amp.autocast(cache_enabled=False), torch.no_grad():
-                traced_model = torch.jit.trace(model, x)
-            with torch.cpu.amp.autocast(), torch.no_grad():
-                y = traced_model(x.clone())
-                y2 = model(x.clone())
-            torch.testing.assert_allclose(y.double(), y2.double(), rtol=1e-03, atol=1e-03)
-        for i in range(self.models.__len__()):
-            test_nchw_autocast_jit_trace_model(self.models[i], self.inputs[i])
-
-    def test_nhwc_autocast_jit_trace_model(self):
-        def test_nhwc_autocast_jit_trace_model(model, x):
-            model.eval()
-            with torch.cpu.amp.autocast(cache_enabled=False), torch.no_grad():
-                traced_model = torch.jit.trace(model, x.to(memory_format=torch.channels_last))
-            with torch.cpu.amp.autocast(), torch.no_grad():
-                y = traced_model(x.clone().to(memory_format=torch.channels_last))
-                y2 = model(x.clone().to(memory_format=torch.channels_last))
-            torch.testing.assert_allclose(y.double(), y2.double(), rtol=1e-03, atol=1e-03)
-        for i in range(self.models.__len__()):
-            if self.inputs[i].size().__len__() == 5:
-                # NHWC 3D case not support yet
-                continue
-            test_nhwc_autocast_jit_trace_model(self.models[i], self.inputs[i])
-
 # UBSAN per-function exclusions don't seem to work with OpenMP pragmas,
 # and we have to disable the failing tests here instead.
 UBSAN_DISABLED_TESTS = [
--- a/test/test_jit_autocast.py
+++ b/test/test_jit_autocast.py
@ -9,6 +9,7 @@ from test_jit import JitTestCase
 from torch.testing._internal.common_cuda import TEST_CUDA
 from torch.testing._internal.common_utils import run_tests
 from torch.testing import FileCheck
+from jit.test_models import MnistNet

 TEST_BFLOAT16 = TEST_CUDA and torch.cuda.is_bf16_supported()

@ -750,5 +751,73 @@ class TestAutocast(JitTestCase):
        g = torch.jit.last_executed_optimized_graph()
        FileCheck().check_not("_autocast_to_reduced").run(g)

+class convbn(torch.nn.Module):
+    def __init__(self, bias_enabled=True):
+        super(convbn, self).__init__()
+        self.conv = torch.nn.Conv2d(3, 64, 7, stride=2, bias=bias_enabled)
+        self.bn = torch.nn.BatchNorm2d(64)
+
+    def forward(self, x):
+        return self.bn(self.conv(x))
+
+class TestJitTraceAutocast(JitTestCase):
+    def setUp(self):
+        super(TestJitTraceAutocast, self).setUp()
+        self.previous_default_dtype = torch.get_default_dtype()
+        torch.set_default_dtype(torch.float32)
+        self.models = [MnistNet(),
+                       convbn(bias_enabled=True),
+                       convbn(bias_enabled=False)]
+        self.inputs = [torch.randn(5, 1, 28, 28, device='cpu'),
+                       torch.randn(32, 3, 224, 224, device='cpu'),
+                       torch.randn(32, 3, 224, 224, device='cpu')]
+        self.previous_jit_autocast_pass = torch._C._jit_set_autocast_mode(False)
+
+    def tearDown(self):
+        torch._C._jit_set_autocast_mode(self.previous_jit_autocast_pass)
+        torch.set_default_dtype(self.previous_default_dtype)
+        super(TestJitTraceAutocast, self).tearDown()
+
+    def test_generate_autocast_jit_trace_model(self):
+        def test_generate_autocast_jit_trace_model(model, x):
+            model.eval()
+            with torch.cpu.amp.autocast(cache_enabled=False), torch.no_grad():
+                traced_model = torch.jit.trace(model, x)
+            traced_model = torch.jit.freeze(traced_model)
+        for i in range(self.models.__len__()):
+            test_generate_autocast_jit_trace_model(self.models[i], self.inputs[i])
+
+    def test_nchw_autocast_jit_trace_model(self):
+        def test_nchw_autocast_jit_trace_model(model, x):
+            model.eval()
+            with torch.cpu.amp.autocast(cache_enabled=False), torch.no_grad():
+                traced_model = torch.jit.trace(model, x)
+            traced_model = torch.jit.freeze(traced_model)
+            with torch.no_grad():
+                y = traced_model(x.clone())
+            with torch.cpu.amp.autocast(), torch.no_grad():
+                y2 = model(x.clone())
+            torch.testing.assert_allclose(y.double(), y2.double(), rtol=1e-03, atol=1e-03)
+        for i in range(self.models.__len__()):
+            test_nchw_autocast_jit_trace_model(self.models[i], self.inputs[i])
+
+    def test_nhwc_autocast_jit_trace_model(self):
+        def test_nhwc_autocast_jit_trace_model(model, x):
+            model = model.to(memory_format=torch.channels_last)
+            model.eval()
+            with torch.cpu.amp.autocast(cache_enabled=False), torch.no_grad():
+                traced_model = torch.jit.trace(model, x.to(memory_format=torch.channels_last))
+            traced_model = torch.jit.freeze(traced_model)
+            with torch.no_grad():
+                y = traced_model(x.clone().to(memory_format=torch.channels_last))
+            with torch.cpu.amp.autocast(), torch.no_grad():
+                y2 = model(x.clone().to(memory_format=torch.channels_last))
+            torch.testing.assert_allclose(y.double(), y2.double(), rtol=1e-03, atol=1e-03)
+        for i in range(self.models.__len__()):
+            if self.inputs[i].size().__len__() == 5:
+                # NHWC 3D case not support yet
+                continue
+            test_nhwc_autocast_jit_trace_model(self.models[i], self.inputs[i])
+
 if __name__ == "__main__":
    run_tests()
--- a/torch/csrc/jit/passes/frozen_conv_folding.cpp
+++ b/torch/csrc/jit/passes/frozen_conv_folding.cpp
@ -87,9 +87,7 @@ bool FoldFrozenConvBatchnorm(Block* b) {
        // placeholder have the same type as conv_w.
        at::ScalarType bias_dtype = bn_rm.scalar_type();
        at::ScalarType weight_dtype = conv_w.scalar_type();
-        at::DeviceType weight_device = conv_w.device().type();
-        if (weight_device == at::kCUDA &&
-            (weight_dtype == at::kHalf || weight_dtype == at::kBFloat16) &&
+        if ((weight_dtype == at::kHalf || weight_dtype == at::kBFloat16) &&
            bias_dtype == at::kFloat) {
          bias_dtype = weight_dtype;
        }