Disable failing test_int8_woq_mm_cuda on slow grad check (#165147)

Fixes #ISSUE_NUMBER Failing due to memory leak, ex https://github.com/pytorch/pytorch/actions/runs/18401518298/job/52434584458 ``` 2025-10-10T11:07:42.9485277Z _ TestSelectAlgorithmCudaCUDA.test_int8_woq_mm_cuda_batch_size_32_mid_dim_8_in_features_144_out_features_65_cuda_bfloat16 _ 2025-10-10T11:07:42.9485389Z Traceback (most recent call last): 2025-10-10T11:07:42.9485869Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 3278, in wrapper 2025-10-10T11:07:42.9485966Z method(*args, **kwargs) 2025-10-10T11:07:42.9486365Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 3278, in wrapper 2025-10-10T11:07:42.9486454Z method(*args, **kwargs) 2025-10-10T11:07:42.9486849Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 3277, in wrapper 2025-10-10T11:07:42.9486933Z with policy(): 2025-10-10T11:07:42.9487380Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 2654, in __exit__ 2025-10-10T11:07:42.9487473Z raise RuntimeError(msg) 2025-10-10T11:07:42.9488533Z RuntimeError: CUDA driver API confirmed a leak in __main__.TestSelectAlgorithmCudaCUDA.test_int8_woq_mm_cuda_batch_size_32_mid_dim_8_in_features_144_out_features_65_cuda_bfloat16! Caching allocator allocated memory was 19456 and is now reported as 29184 on device 0. CUDA driver allocated memory was 356712448 and is now 358809600. 2025-10-10T11:07:42.9488543Z 2025-10-10T11:07:42.9488722Z To execute this test, run the following from the base repo dir: 2025-10-10T11:07:42.9489520Z PYTORCH_TEST_CUDA_MEM_LEAK_CHECK=1 PYTORCH_TEST_WITH_SLOW_GRADCHECK=1 python test/inductor/test_cuda_select_algorithm.py TestSelectAlgorithmCudaCUDA.test_int8_woq_mm_cuda_batch_size_32_mid_dim_8_in_features_144_out_features_65_cuda_bfloat16 2025-10-10T11:07:42.9489525Z 2025-10-10T11:07:42.9489748Z This message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0 ``` Got added in #161680 Pull Request resolved: https://github.com/pytorch/pytorch/pull/165147 Approved by: https://github.com/bbeckca
2025-10-20 21:14:14 +08:00 · 2025-10-10 20:26:31 +00:00
parent 4f8a986b8f
commit 0055f07997
1 changed files with 6 additions and 1 deletions
--- a/test/inductor/test_cuda_select_algorithm.py
+++ b/test/inductor/test_cuda_select_algorithm.py
@ -17,7 +17,11 @@ from torch.testing._internal.common_device_type import (
 from torch.testing._internal.common_quantized import (
    _calculate_dynamic_per_channel_qparams,
 )
-from torch.testing._internal.common_utils import parametrize, TEST_CUDA
+from torch.testing._internal.common_utils import (
+    parametrize,
+    TEST_CUDA,
+    TEST_WITH_SLOW_GRADCHECK,
+)


 try:
@ -79,6 +83,7 @@ class TestSelectAlgorithmCuda(BaseTestSelectAlgorithm):
    @parametrize("in_features", (128, 144, 1024))
    @parametrize("out_features", (64, 65, 1024))
    @unittest.skipIf(not TEST_CUDA, "CUDA not available")
+    @unittest.skipIf(TEST_WITH_SLOW_GRADCHECK, "Leaking memory")
    def test_int8_woq_mm_cuda(
        self, dtype, batch_size, mid_dim, in_features, out_features
    ):