mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Disable failing test_int8_woq_mm_cuda on slow grad check (#165147)
Fixes #ISSUE_NUMBER Failing due to memory leak, ex https://github.com/pytorch/pytorch/actions/runs/18401518298/job/52434584458 ``` 2025-10-10T11:07:42.9485277Z _ TestSelectAlgorithmCudaCUDA.test_int8_woq_mm_cuda_batch_size_32_mid_dim_8_in_features_144_out_features_65_cuda_bfloat16 _ 2025-10-10T11:07:42.9485389Z Traceback (most recent call last): 2025-10-10T11:07:42.9485869Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 3278, in wrapper 2025-10-10T11:07:42.9485966Z method(*args, **kwargs) 2025-10-10T11:07:42.9486365Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 3278, in wrapper 2025-10-10T11:07:42.9486454Z method(*args, **kwargs) 2025-10-10T11:07:42.9486849Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 3277, in wrapper 2025-10-10T11:07:42.9486933Z with policy(): 2025-10-10T11:07:42.9487380Z File "/opt/conda/envs/py_3.10/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 2654, in __exit__ 2025-10-10T11:07:42.9487473Z raise RuntimeError(msg) 2025-10-10T11:07:42.9488533Z RuntimeError: CUDA driver API confirmed a leak in __main__.TestSelectAlgorithmCudaCUDA.test_int8_woq_mm_cuda_batch_size_32_mid_dim_8_in_features_144_out_features_65_cuda_bfloat16! Caching allocator allocated memory was 19456 and is now reported as 29184 on device 0. CUDA driver allocated memory was 356712448 and is now 358809600. 2025-10-10T11:07:42.9488543Z 2025-10-10T11:07:42.9488722Z To execute this test, run the following from the base repo dir: 2025-10-10T11:07:42.9489520Z PYTORCH_TEST_CUDA_MEM_LEAK_CHECK=1 PYTORCH_TEST_WITH_SLOW_GRADCHECK=1 python test/inductor/test_cuda_select_algorithm.py TestSelectAlgorithmCudaCUDA.test_int8_woq_mm_cuda_batch_size_32_mid_dim_8_in_features_144_out_features_65_cuda_bfloat16 2025-10-10T11:07:42.9489525Z 2025-10-10T11:07:42.9489748Z This message can be suppressed by setting PYTORCH_PRINT_REPRO_ON_FAILURE=0 ``` Got added in #161680 Pull Request resolved: https://github.com/pytorch/pytorch/pull/165147 Approved by: https://github.com/bbeckca
This commit is contained in:
committed by
PyTorch MergeBot
parent
4f8a986b8f
commit
0055f07997
@ -17,7 +17,11 @@ from torch.testing._internal.common_device_type import (
|
||||
from torch.testing._internal.common_quantized import (
|
||||
_calculate_dynamic_per_channel_qparams,
|
||||
)
|
||||
from torch.testing._internal.common_utils import parametrize, TEST_CUDA
|
||||
from torch.testing._internal.common_utils import (
|
||||
parametrize,
|
||||
TEST_CUDA,
|
||||
TEST_WITH_SLOW_GRADCHECK,
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
@ -79,6 +83,7 @@ class TestSelectAlgorithmCuda(BaseTestSelectAlgorithm):
|
||||
@parametrize("in_features", (128, 144, 1024))
|
||||
@parametrize("out_features", (64, 65, 1024))
|
||||
@unittest.skipIf(not TEST_CUDA, "CUDA not available")
|
||||
@unittest.skipIf(TEST_WITH_SLOW_GRADCHECK, "Leaking memory")
|
||||
def test_int8_woq_mm_cuda(
|
||||
self, dtype, batch_size, mid_dim, in_features, out_features
|
||||
):
|
||||
|
Reference in New Issue
Block a user