mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Disable test_fs family for dynamo (#91459)
This should help address https://github.com/pytorch/pytorch/issues/67002. At the end of these tests, any temp file `/dev/shm/torch_*` are cleaned up, but somehow it might take longer than 0.5s to finish causing the test to fail. So, the PR tries to increase this max waiting time to 5s while polling for the result every 0.5s as before ### Testing `pytest test_multiprocessing.py -k test_fs --verbose --flake-finder` to run `test_fs`, `test_fs_is_shared`, `test_fs_pool`, `test_fs_preserve_sharing`, and `test_fs_sharing` 50 times on a dynamo shard. All passes. Pull Request resolved: https://github.com/pytorch/pytorch/pull/91459 Approved by: https://github.com/kit1980, https://github.com/ZainRizvi, https://github.com/atalman
This commit is contained in:
@ -15,7 +15,7 @@ import torch.multiprocessing as mp
|
||||
import torch.utils.hooks
|
||||
from torch.nn import Parameter
|
||||
from torch.testing._internal.common_utils import (TestCase, run_tests, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN, TEST_WITH_ASAN,
|
||||
load_tests, slowTest, TEST_WITH_TSAN)
|
||||
load_tests, slowTest, TEST_WITH_TSAN, TEST_WITH_TORCHDYNAMO)
|
||||
|
||||
# load_tests from common_utils is used to automatically filter tests for
|
||||
# sharding on sandcastle. This line silences flake warnings
|
||||
@ -23,6 +23,7 @@ load_tests = load_tests
|
||||
|
||||
TEST_REPEATS = 30
|
||||
HAS_SHM_FILES = os.path.isdir('/dev/shm')
|
||||
MAX_WAITING_TIME_IN_SECONDS = 5
|
||||
TEST_CUDA_IPC = torch.cuda.is_available() and \
|
||||
sys.platform != 'darwin' and \
|
||||
sys.platform != 'win32'
|
||||
@ -219,10 +220,19 @@ class leak_checker(object):
|
||||
def has_shm_files(self, wait=True):
|
||||
if not HAS_SHM_FILES:
|
||||
return False
|
||||
|
||||
result = self._has_shm_files()
|
||||
if result and mp.get_sharing_strategy() == 'file_system' and wait:
|
||||
time.sleep(0.5)
|
||||
return self._has_shm_files()
|
||||
if not result or mp.get_sharing_strategy() != 'file_system' or not wait:
|
||||
return result
|
||||
|
||||
total_waiting_time = 0
|
||||
waiting_time = 0.5
|
||||
|
||||
while total_waiting_time <= MAX_WAITING_TIME_IN_SECONDS and result:
|
||||
time.sleep(waiting_time)
|
||||
total_waiting_time += waiting_time
|
||||
result = self._has_shm_files()
|
||||
|
||||
return result
|
||||
|
||||
def _has_shm_files(self):
|
||||
@ -342,19 +352,27 @@ class TestMultiprocessing(TestCase):
|
||||
|
||||
@unittest.skipIf(TEST_WITH_ASAN,
|
||||
"seems to hang with ASAN, see https://github.com/pytorch/pytorch/issues/5326")
|
||||
@unittest.skipIf(TEST_WITH_TORCHDYNAMO,
|
||||
"Fail to clean up temporary /dev/shm/torch_* file, see https://github.com/pytorch/pytorch/issues/91467")
|
||||
def test_fs_sharing(self):
|
||||
with fs_sharing():
|
||||
self._test_sharing(repeat=TEST_REPEATS)
|
||||
|
||||
@unittest.skipIf(TEST_WITH_TORCHDYNAMO,
|
||||
"Fail to clean up temporary /dev/shm/torch_* file, see https://github.com/pytorch/pytorch/issues/91467")
|
||||
def test_fs_preserve_sharing(self):
|
||||
with fs_sharing():
|
||||
self._test_preserve_sharing(repeat=TEST_REPEATS)
|
||||
|
||||
@unittest.skipIf(TEST_WITH_TORCHDYNAMO,
|
||||
"Fail to clean up temporary /dev/shm/torch_* file, see https://github.com/pytorch/pytorch/issues/91467")
|
||||
def test_fs_pool(self):
|
||||
with fs_sharing():
|
||||
self._test_pool(repeat=TEST_REPEATS)
|
||||
|
||||
@unittest.skipIf(not HAS_SHM_FILES, "don't not how to check if shm files exist")
|
||||
@unittest.skipIf(TEST_WITH_TORCHDYNAMO,
|
||||
"Fail to clean up temporary /dev/shm/torch_* file, see https://github.com/pytorch/pytorch/issues/91467")
|
||||
def test_fs(self):
|
||||
def queue_put():
|
||||
x = torch.DoubleStorage(4)
|
||||
|
Reference in New Issue
Block a user