Run tests in USE_PYTEST_LIST through run_tests (#95659)

Part of my effort to move everything to pytest and decrease the number of testrunner frameworks in ci Gives xmls but they might look a weird b/c module level tests vs tests in classes. Doesn't give skip/disable test infra because those are tied to classes. (for future ref, could either put tests in classes or move the check_if_enable stuff into a pytest hook) Tested in CI and checked that the same number of tests are run Pull Request resolved: https://github.com/pytorch/pytorch/pull/95659 Approved by: https://github.com/huydhn
2025-10-20 12:54:11 +08:00 · 2023-02-28 22:09:01 +00:00
parent e5b9d98752
commit e3c5c369ba
29 changed files with 140 additions and 29 deletions
--- a/pytest.ini
+++ b/pytest.ini
@ -11,3 +11,5 @@ addopts =
 testpaths =
    test
 junit_logging_reruns = all
+filterwarnings =
+    ignore:Module already imported so cannot be rewritten.*hypothesis:pytest.PytestAssertRewriteWarning
--- a/test/distributed/elastic/events/lib_test.py
+++ b/test/distributed/elastic/events/lib_test.py
@ -9,7 +9,6 @@

 import json
 import logging
-import unittest
 from dataclasses import asdict
 from unittest.mock import patch

@ -21,10 +20,10 @@ from torch.distributed.elastic.events import (
    _get_or_create_logger,
    construct_and_record_rdzv_event,
 )
-from torch.testing._internal.common_utils import run_tests
+from torch.testing._internal.common_utils import run_tests, TestCase


-class EventLibTest(unittest.TestCase):
+class EventLibTest(TestCase):
    def assert_event(self, actual_event, expected_event):
        self.assertEqual(actual_event.name, expected_event.name)
        self.assertEqual(actual_event.source, expected_event.source)
@ -59,7 +58,7 @@ class EventLibTest(unittest.TestCase):
        deser_event = Event.deserialize(json_event)
        self.assert_event(event, deser_event)

-class RdzvEventLibTest(unittest.TestCase):
+class RdzvEventLibTest(TestCase):
    @patch("torch.distributed.elastic.events.record_rdzv_event")
    @patch("torch.distributed.elastic.events.get_logging_handler")
    def test_construct_and_record_rdzv_event(self, get_mock, record_mock):
--- a/test/distributed/pipeline/sync/skip/test_api.py
+++ b/test/distributed/pipeline/sync/skip/test_api.py
@ -11,6 +11,7 @@ import copy
 from torch import nn

 from torch.distributed.pipeline.sync.skip import Namespace, skippable, stash
+from torch.testing._internal.common_utils import run_tests


 def test_namespace_difference():
@ -45,3 +46,7 @@ def test_skippable_repr():
 ))
 """.strip()
    )
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/skip/test_gpipe.py
+++ b/test/distributed/pipeline/sync/skip/test_gpipe.py
@ -14,6 +14,7 @@ from torch.distributed.pipeline.sync import Pipe
 from torch.distributed.pipeline.sync.skip import pop, skippable, stash
 from torch.distributed.pipeline.sync.skip.portal import PortalBlue, PortalCopy, PortalOrange
 from torch.distributed.pipeline.sync.utils import partition_model
+from torch.testing._internal.common_utils import run_tests


@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda required")
@ -108,3 +109,7 @@ def test_none_skip(setup_rpc):

    output.local_value().sum().backward()
    assert input.grad.mean().item() == 1
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/skip/test_inspect_skip_layout.py
+++ b/test/distributed/pipeline/sync/skip/test_inspect_skip_layout.py
@ -10,6 +10,7 @@ from torch import nn

 from torch.distributed.pipeline.sync.skip import Namespace, pop, skippable, stash
 from torch.distributed.pipeline.sync.skip.layout import inspect_skip_layout
+from torch.testing._internal.common_utils import run_tests


 class Pass(nn.Module):
@ -111,3 +112,7 @@ def test_namespace():

    # p3 pops 'bar' before 'foo', but the plan is sorted by source partition index.
    assert policy == [[], [], [(0, ns1, "foo"), (1, ns2, "foo")]]
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/skip/test_leak.py
+++ b/test/distributed/pipeline/sync/skip/test_leak.py
@ -13,6 +13,7 @@ from torch import nn
 from torch.distributed.pipeline.sync import Pipe, is_checkpointing, is_recomputing
 from torch.distributed.pipeline.sync.skip import pop, skippable, stash
 from torch.distributed.pipeline.sync.skip.tracker import current_skip_tracker
+from torch.testing._internal.common_utils import run_tests


@skippable(stash=["skip"])
@ -126,3 +127,7 @@ def test_no_portal_without_pipe(train, monkeypatch, setup_rpc):
        model.eval()
        with torch.no_grad():
            model(input)
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/skip/test_portal.py
+++ b/test/distributed/pipeline/sync/skip/test_portal.py
@ -12,6 +12,7 @@ import torch
 from torch.distributed.pipeline.sync.dependency import fork, join
 from torch.distributed.pipeline.sync.skip.portal import Portal
 from torch.distributed.pipeline.sync.stream import default_stream
+from torch.testing._internal.common_utils import run_tests


@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda required")
@ -155,3 +156,7 @@ class TestTensorLife:
        another_tensor = torch.rand(1, requires_grad=True)
        portal.put_tensor(another_tensor, tensor_life=1)
        portal.blue()
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/skip/test_stash_pop.py
+++ b/test/distributed/pipeline/sync/skip/test_stash_pop.py
@ -12,6 +12,7 @@ from torch import nn

 from torch.distributed.pipeline.sync.skip import pop, skippable, stash
 from torch.distributed.pipeline.sync.skip.tracker import SkipTracker, use_skip_tracker
+from torch.testing._internal.common_utils import run_tests


@pytest.fixture(autouse=True)
@ -136,3 +137,7 @@ def test_stash_none():

    l1 = Stash()
    l1(torch.tensor(42))
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/skip/test_tracker.py
+++ b/test/distributed/pipeline/sync/skip/test_tracker.py
@ -18,6 +18,7 @@ from torch.distributed.pipeline.sync.microbatch import Batch
 from torch.distributed.pipeline.sync.skip import pop, skippable, stash
 from torch.distributed.pipeline.sync.skip.layout import SkipLayout
 from torch.distributed.pipeline.sync.skip.tracker import SkipTracker, SkipTrackerThroughPotals, current_skip_tracker
+from torch.testing._internal.common_utils import run_tests


 def test_default_skip_tracker():
@ -127,3 +128,7 @@ def test_tensor_life_with_checkpointing():
    with enable_recomputing():
        skip_tracker.save(batch, None, "test", tensor)
    assert skip_tracker.portals[(None, "test")].tensor_life == 0
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/skip/test_verify_skippables.py
+++ b/test/distributed/pipeline/sync/skip/test_verify_skippables.py
@ -10,6 +10,7 @@ import pytest
 from torch import nn

 from torch.distributed.pipeline.sync.skip import Namespace, skippable, verify_skippables
+from torch.testing._internal.common_utils import run_tests


 def test_matching():
@ -152,3 +153,7 @@ def test_double_stash_pop_but_isolated():
    verify_skippables(
        nn.Sequential(Layer1().isolate(ns1), Layer2().isolate(ns1), Layer3().isolate(ns2), Layer4().isolate(ns2),)
    )
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_balance.py
+++ b/test/distributed/pipeline/sync/test_balance.py
@ -14,6 +14,7 @@ from torch import nn

 from torch.distributed.pipeline.sync._balance import balance_by_size, balance_by_time, blockpartition
 from torch.distributed.pipeline.sync._balance.profile import layerwise_sandbox
+from torch.testing._internal.common_utils import run_tests

 skip_if_no_cuda = pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda required")

@ -223,3 +224,7 @@ def test_already_has_grad():

    with pytest.raises(ValueError, match="some parameter already has gradient"):
        balance_by_time(1, model, sample, device="cpu")
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_bugs.py
+++ b/test/distributed/pipeline/sync/test_bugs.py
@ -12,6 +12,7 @@ from torch import nn
 import torch.nn.functional as F

 from torch.distributed.pipeline.sync import Pipe
+from torch.testing._internal.common_utils import run_tests


 def test_python_autograd_function(setup_rpc):
@ -137,3 +138,7 @@ def test_parallel_randoms(setup_rpc):
    y.norm().backward()

    assert y.to(torch.bool).tolist() == x.grad.to(torch.bool).tolist()
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_checkpoint.py
+++ b/test/distributed/pipeline/sync/test_checkpoint.py
@ -16,6 +16,7 @@ import torch.cuda
 from torch.distributed.pipeline.sync.checkpoint import Checkpointing, checkpoint, is_checkpointing, is_recomputing
 from torch.distributed.pipeline.sync.dependency import fork, join
 from torch.distributed.pipeline.sync.microbatch import Batch
+from torch.testing._internal.common_utils import run_tests

 devices = ["cpu"]
 if torch.cuda.is_available():
@ -158,3 +159,7 @@ def test_non_grad_output():

    output = checkpoint(model, input)
    output[0].backward()
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_copy.py
+++ b/test/distributed/pipeline/sync/test_copy.py
@ -11,6 +11,7 @@ import torch

 from torch.distributed.pipeline.sync.copy import Copy, Wait
 from torch.distributed.pipeline.sync.stream import CPUStream, current_stream, get_device, is_cuda, new_stream, use_stream
+from torch.testing._internal.common_utils import run_tests

 skip_if_no_cuda = pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda required")

@ -68,3 +69,7 @@ def test_wait_multiple_tensors():

    assert a.grad_fn is b.grad_fn
    assert a.grad_fn.__class__ is Wait._backward_cls
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_deferred_batch_norm.py
+++ b/test/distributed/pipeline/sync/test_deferred_batch_norm.py
@ -14,6 +14,7 @@ import torch
 from torch import nn, optim

 from torch.distributed.pipeline.sync.batchnorm import DeferredBatchNorm
+from torch.testing._internal.common_utils import run_tests

 CHUNKS = 4

@ -192,3 +193,7 @@ def test_input_requiring_grad():

    assert not dbn.sum.requires_grad
    assert dbn.sum.grad_fn is None
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_dependency.py
+++ b/test/distributed/pipeline/sync/test_dependency.py
@ -12,6 +12,7 @@ import pytest
 import torch

 from torch.distributed.pipeline.sync.dependency import Fork, Join, fork, join
+from torch.testing._internal.common_utils import run_tests


@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda required")
@ -144,3 +145,7 @@ def test_join_when_fork_requires_grad():
    assert not b.requires_grad
    b = join(b, p)
    assert b.requires_grad
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_inplace.py
+++ b/test/distributed/pipeline/sync/test_inplace.py
@ -11,6 +11,7 @@ import torch
 from torch import nn

 from torch.distributed.pipeline.sync import Pipe
+from torch.testing._internal.common_utils import run_tests


 def test_inplace_on_requires_grad(setup_rpc):
@ -71,3 +72,7 @@ def test_inplace_incorrect_grad(setup_rpc):
    # The gradient of 'foo' should be 2, but it is 3 actually because
    # bar.add_(1) was executed twice due to checkpointing.
    assert foo.grad.item() == 2.0
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_microbatch.py
+++ b/test/distributed/pipeline/sync/test_microbatch.py
@ -11,6 +11,7 @@ import torch
 import torch.cuda

 from torch.distributed.pipeline.sync.microbatch import Batch, check, gather, scatter
+from torch.testing._internal.common_utils import run_tests


 def test_batch_atomic():
@ -140,3 +141,7 @@ def test_scatter_multiple_tensors():
    assert list(b)[0].size() == (1, 1)
    assert list(a)[1].size() == (2, 2)
    assert list(b)[1].size() == (2, 2)
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_phony.py
+++ b/test/distributed/pipeline/sync/test_phony.py
@ -9,6 +9,7 @@
 import torch

 from torch.distributed.pipeline.sync.phony import get_phony
+from torch.testing._internal.common_utils import run_tests


 def test_phony_size():
@ -50,3 +51,7 @@ def test_phony_in_autograd_function():
    assert p1 is not p2
    assert p1.grad_fn is not None
    assert p2.grad_fn is None
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_pipe.py
+++ b/test/distributed/pipeline/sync/test_pipe.py
@ -18,6 +18,7 @@ from torch import Tensor

 from torch.distributed.pipeline.sync import Pipe, NoChunk, WithDevice
 from torch.distributed.pipeline.sync.pipe import PipeSequential
+from torch.testing._internal.common_utils import run_tests

 skip_if_no_cuda = pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda required")

@ -819,3 +820,7 @@ def test_with_device_wrapper(setup_rpc):
    assert torch.device('cuda:0') == model(torch.rand(16, 16).cuda(0)).local_value().device
    assert [torch.device('cuda:0')] == model.devices
    assert torch.device('cuda:0') == fc2.weight.device
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_pipeline.py
+++ b/test/distributed/pipeline/sync/test_pipeline.py
@ -7,6 +7,7 @@
 # This source code is licensed under the BSD license found in the
 # LICENSE file in the root directory of this source tree.
 from torch.distributed.pipeline.sync.pipeline import _clock_cycles
+from torch.testing._internal.common_utils import run_tests


 def test_clock_cycles():
@ -29,3 +30,7 @@ def test_clock_cycles():
        [(3, 0), (2, 1)],
        [(3, 1)],
    ]
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_stream.py
+++ b/test/distributed/pipeline/sync/test_stream.py
@ -21,6 +21,7 @@ from torch.distributed.pipeline.sync.stream import (
    use_stream,
    wait_stream,
 )
+from torch.testing._internal.common_utils import run_tests

 skip_if_no_cuda = pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda required")

@ -188,3 +189,7 @@ class TestRecordStream:
        with torch.cuda.stream(stream_alloc):
            z = torch.rand(2, device=torch.device("cuda"))
        assert z.data_ptr() != data_ptr
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_transparency.py
+++ b/test/distributed/pipeline/sync/test_transparency.py
@ -10,6 +10,7 @@ import torch
 from torch import nn

 from torch.distributed.pipeline.sync import Pipe
+from torch.testing._internal.common_utils import run_tests


 def test_simple_linears(setup_rpc):
@ -43,3 +44,7 @@ def test_simple_linears(setup_rpc):

    # Both grads should be identical.
    assert torch.allclose(grad_with_pipe, grad_without_pipe)
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributed/pipeline/sync/test_worker.py
+++ b/test/distributed/pipeline/sync/test_worker.py
@ -14,6 +14,7 @@ import torch
 from torch.distributed.pipeline.sync.microbatch import Batch
 from torch.distributed.pipeline.sync.stream import CPUStream
 from torch.distributed.pipeline.sync.worker import Task, spawn_workers
+from torch.testing._internal.common_utils import run_tests


 class fake_device:
@ -109,3 +110,7 @@ def test_worker_per_device():
        # 3: fake1, 4: fake2
        assert in_queues[3] is not in_queues[4]
        assert out_queues[3] is not out_queues[4]
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributions/test_constraints.py
+++ b/test/distributions/test_constraints.py
@ -5,6 +5,7 @@ import pytest
 import torch
 from torch.distributions import biject_to, constraints, transform_to
 from torch.testing._internal.common_cuda import TEST_CUDA
+from torch.testing._internal.common_utils import run_tests


 EXAMPLES = [
@ -124,5 +125,5 @@ def test_transform_to(constraint_fn, args, is_cuda):
    assert torch.allclose(y, y2), "Error in transform_to({}) pseudoinverse".format(constraint)


-if __name__ == '__main__':
-    pytest.main([__file__])
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributions/test_transforms.py
+++ b/test/distributions/test_transforms.py
@ -17,6 +17,7 @@ from torch.distributions.transforms import (AbsTransform, AffineTransform, Compo
                                            identity_transform, Transform, _InverseTransform,
                                            PositiveDefiniteTransform)
 from torch.distributions.utils import tril_matrix_to_vec, vec_to_tril_matrix
+from torch.testing._internal.common_utils import run_tests


 def get_transforms(cache_size):
@ -494,5 +495,5 @@ def test_save_load_transform():
    assert torch.allclose(log_prob, other.log_prob(x))


-if __name__ == '__main__':
-    pytest.main([__file__])
+if __name__ == "__main__":
+    run_tests()
--- a/test/distributions/test_utils.py
+++ b/test/distributions/test_utils.py
@ -4,7 +4,7 @@ import pytest

 import torch
 from torch.distributions.utils import tril_matrix_to_vec, vec_to_tril_matrix
-
+from torch.testing._internal.common_utils import run_tests

@pytest.mark.parametrize('shape', [
    (2, 2),
@ -22,5 +22,5 @@ def test_tril_matrix_to_vec(shape):
        assert torch.allclose(tril_mat, actual)


-if __name__ == '__main__':
-    pytest.main([__file__])
+if __name__ == "__main__":
+    run_tests()
--- a/test/run_test.py
+++ b/test/run_test.py
@ -193,7 +193,7 @@ USE_PYTEST_LIST = [
    "distributed/elastic/events/lib_test",
    "distributed/elastic/agent/server/test/api_test",
    "test_deploy",
-    "distributed/test_c10d_error_logger.py"
+    "distributed/test_c10d_error_logger"
 ]

 WINDOWS_BLOCKLIST = [
@ -428,18 +428,11 @@ def print_to_stderr(message):
    print(message, file=sys.stderr)


-def get_executable_command(options, allow_pytest, disable_coverage=False):
+def get_executable_command(options, disable_coverage=False):
    if options.coverage and not disable_coverage:
        executable = ["coverage", "run", "--parallel-mode", "--source=torch"]
    else:
        executable = [sys.executable, "-bb"]
-    if options.pytest:
-        if allow_pytest:
-            executable += ["-m", "pytest"]
-        else:
-            print_to_stderr(
-                "Pytest cannot be used for this test. Falling back to unittest."
-            )
    return executable


@ -465,8 +458,9 @@ def run_test(

    # If using pytest, replace -f with equivalent -x
    if options.pytest:
+        unittest_args.extend(get_pytest_args(options))
        unittest_args = [arg if arg != "-f" else "-x" for arg in unittest_args]
-    elif IS_CI:
+    if IS_CI:
        ci_args = ["--import-slow-tests", "--import-disabled-tests"]
        if os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1":
            ci_args.append("--rerun-disabled-tests")
@ -474,9 +468,7 @@ def run_test(
        unittest_args.extend(ci_args)

    # Extra arguments are not supported with pytest
-    executable = get_executable_command(
-        options, allow_pytest=not extra_unittest_args
-    )
+    executable = get_executable_command(options)

    # Can't call `python -m unittest test_*` here because it doesn't run code
    # in `if __name__ == '__main__': `. So call `python test_*.py` instead.
@ -793,7 +785,7 @@ def print_log_file(test: str, file_path: str, failed: bool) -> None:
        print_to_stderr("")


-def run_test_ops(test_module, test_directory, options):
+def get_pytest_args(options):
    if os.getenv("PYTORCH_TEST_RERUN_DISABLED_TESTS", "0") == "1":
        # When under rerun-disabled-tests mode, run the same tests multiple times to determine their
        # flakiness status. Default to 50 re-runs
@ -806,12 +798,16 @@ def run_test_ops(test_module, test_directory, options):
        # failure
        rerun_options = ["-x", "--reruns=2"]

-    default_unittest_args = [
+    pytest_args = [
        "--use-pytest",
        "-vv",
        "-rfEX"
    ]
-    default_unittest_args.extend(rerun_options)
+    pytest_args.extend(rerun_options)
+    return pytest_args
+
+def run_test_ops(test_module, test_directory, options):
+    default_unittest_args = get_pytest_args(options)

    return_codes = []
    os.environ["NUM_PARALLEL_PROCS"] = str(NUM_PROCS)
--- a/test/test_typing.py
+++ b/test/test_typing.py
@ -10,6 +10,8 @@ from typing import IO, Dict, List, Optional

 import pytest

+from torch.testing._internal.common_utils import run_tests
+
 try:
    from mypy import api
 except ImportError:
@ -232,5 +234,5 @@ def _test_reveal(path: str, reveal: str, expected_reveal: str, lineno: int) -> N
        raise AssertionError(_REVEAL_MSG.format(lineno, expected_reveal, reveal))


-if __name__ == '__main__':
-    pytest.main([__file__])
+if __name__ == "__main__":
+    run_tests()