From ac568fc0077748264538fda9ce99c856b7ca944a Mon Sep 17 00:00:00 2001 From: eqy Date: Mon, 3 Jun 2024 22:42:02 +0000 Subject: [PATCH] [CUDNN] Remove defunct cuDNN V8 API build flag (#120006) The flag basically does nothing following #95722 Let's see if the quantization tests break CC @malfet @atalmanagement Pull Request resolved: https://github.com/pytorch/pytorch/pull/120006 Approved by: https://github.com/malfet --- cmake/Summary.cmake | 1 - defs.bzl | 1 - test/quantization/core/test_quantized_op.py | 49 ++++++++++----------- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 99b6521328d6..bc15f70ad1f5 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -71,7 +71,6 @@ function(caffe2_print_configuration_summary) message(STATUS " Split CUDA : ${BUILD_SPLIT_CUDA}") message(STATUS " CUDA static link : ${CAFFE2_STATIC_LINK_CUDA}") message(STATUS " USE_CUDNN : ${USE_CUDNN}") - message(STATUS " USE_EXPERIMENTAL_CUDNN_V8_API: ${USE_EXPERIMENTAL_CUDNN_V8_API}") message(STATUS " USE_CUSPARSELT : ${USE_CUSPARSELT}") message(STATUS " CUDA version : ${CUDA_VERSION}") message(STATUS " USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}") diff --git a/defs.bzl b/defs.bzl index 6ea4b1219325..5e8923556af0 100644 --- a/defs.bzl +++ b/defs.bzl @@ -33,7 +33,6 @@ default_compiler_flags = [ "-DTH_INDEX_BASE=0", "-DMAGMA_V2", "-DNO_CUDNN_DESTROY_HANDLE", - "-DUSE_EXPERIMENTAL_CUDNN_V8_API", # enable cudnn v8 api "-DUSE_FBGEMM", "-DUSE_PYTORCH_QNNPACK", # The dynamically loaded NVRTC trick doesn't work in fbcode, diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py index d59f1fffd926..6671b6634e00 100644 --- a/test/quantization/core/test_quantized_op.py +++ b/test/quantization/core/test_quantized_op.py @@ -21,6 +21,7 @@ from hypothesis import strategies as st import torch.testing._internal.hypothesis_utils as hu hu.assert_deadline_disabled() +from torch.testing._internal.common_cuda import SM80OrLater from torch.testing._internal.common_utils import TestCase from torch.testing._internal.common_utils import IS_PPC, TEST_WITH_UBSAN, IS_MACOS, BUILD_WITH_CAFFE2, IS_SANDCASTLE from torch.testing._internal.common_quantization import skipIfNoFBGEMM, skipIfNoQNNPACK, skipIfNoONEDNN @@ -31,10 +32,12 @@ from torch.testing._internal.common_quantized import ( qengine_is_onednn, ) from torch.ao.quantization import PerChannelMinMaxObserver -from torch.testing._internal.common_cuda import TEST_CUDNN, TEST_CUDA +from torch.testing._internal.common_cuda import TEST_CUDNN, TEST_CUDNN_VERSION, TEST_CUDA from torch.testing._internal.optests import opcheck import torch.backends.xnnpack +from torch.utils.cpp_extension import ROCM_HOME + from typing import Optional np_dtype = { @@ -43,6 +46,8 @@ np_dtype = { torch.qint32 : np.int32 } +TEST_ROCM = TEST_CUDA and torch.version.hip is not None and ROCM_HOME is not None + class PointwisePostOp(NamedTuple): binary_attr : str = "none" alpha : float = 1.0 @@ -905,9 +910,8 @@ class TestQuantizedOps(TestCase): """Tests the correctness of the cudnn add and add_relu op (Similar to test_qadd_relu_different_qparams, will probably merge in the future)""" @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the test_qadd_relu_cudnn op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(not SM80OrLater, "requires sm80 or later.") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") def test_qadd_relu_cudnn(self): dtype = torch.qint8 add_relu = torch.ops.quantized.add_relu @@ -940,9 +944,8 @@ class TestQuantizedOps(TestCase): """Tests the correctness of the cudnn add and add_relu op for nhwc format""" @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the test_qadd_relu_cudnn_nhwc op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(not SM80OrLater, "requires sm80 or later.") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") def test_qadd_relu_cudnn_nhwc(self): dtype = torch.qint8 add_relu = torch.ops.quantized.add_relu @@ -1379,7 +1382,7 @@ class TestQuantizedOps(TestCase): self.assertEqual(a_ref, a_hat.dequantize(), msg="ops.quantized.max_pool1d results are off") - # TODO: merge this test with test_max_pool2d when USE_EXPERIMENTAL_CUDNN_V8_API flag is enabled in CI + # TODO: merge this test with test_max_pool2d """Tests 2D cudnn max pool operation on quantized tensors.""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), @@ -1394,9 +1397,8 @@ class TestQuantizedOps(TestCase): padding=st.integers(0, 2), ceil_mode=st.booleans()) @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the qconv2d_cudnn op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(TEST_CUDNN_VERSION <= 90100, "cuDNN maxpool2d mishandles -128 before v90100") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") def test_max_pool2d_cudnn(self, X, kernel, stride, dilation, padding, ceil_mode): X, (scale, zero_point, torch_type) = X assume(kernel // 2 >= padding) # Kernel cannot be overhanging! @@ -4050,9 +4052,8 @@ class TestQuantizedLinear(TestCase): use_channelwise=st.sampled_from([False])) # channelwise currently not supported for qlinear cudnn @skipIfNoFBGEMM @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the qlinear_cudnn op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(not SM80OrLater, "requires sm80 or later.") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") # TODO: check with yang regarding CUDNN flags def test_qlinear_cudnn(self, batch_size, input_channels, output_channels, use_bias, use_relu, use_multi_dim_input, use_channelwise): @@ -5427,9 +5428,8 @@ class TestQuantizedConv(TestCase): use_channelwise=st.sampled_from([False])) @skipIfNoFBGEMM @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the qconv2d_cudnn op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(not SM80OrLater, "requires sm80 or later.") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") def test_qconv2d_cudnn( self, batch_size, @@ -5510,9 +5510,8 @@ class TestQuantizedConv(TestCase): use_channelwise=st.sampled_from([False])) @skipIfNoFBGEMM @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the qconv2d_cudnn op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(not SM80OrLater, "requires sm80 or later.") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") def test_qconv2d_relu_cudnn( self, batch_size, @@ -6245,9 +6244,8 @@ class TestQuantizedConv(TestCase): use_channelwise=st.sampled_from([False])) @skipIfNoFBGEMM @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the qconv1d_cudnn op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(not SM80OrLater, "requires sm80 or later.") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") def test_qconv1d_cudnn( self, batch_size, @@ -6319,9 +6317,8 @@ class TestQuantizedConv(TestCase): use_channelwise=st.sampled_from([False])) @skipIfNoFBGEMM @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.") - @unittest.skip("Local only - currently the qconv1d_cudnn op is bulid " - "with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test " - "after it is built by default") + @unittest.skipIf(not SM80OrLater, "requires sm80 or later.") + @unittest.skipIf(TEST_ROCM, "not supported on rocm.") def test_qconv1d_relu_cudnn( self, batch_size,