Revert "Reduce pytest prints (#117069)"

This reverts commit 40dbd567e04483c671f9c897171bf9d1e7162b68. Reverted https://github.com/pytorch/pytorch/pull/117069 on behalf of https://github.com/clee2000 due to need to handle timeout expired better ([comment](https://github.com/pytorch/pytorch/pull/117069#issuecomment-1901270953))
2025-10-20 21:14:14 +08:00 · 2024-01-19 23:07:51 +00:00
parent 5538b37a06
commit f684e44fd6
7 changed files with 46 additions and 122 deletions
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -162,6 +162,11 @@ pytest-xdist==3.3.1
 #Pinned versions:
 #test that import:

+pytest-shard==0.1.2
+#Description: plugin spliting up tests in pytest
+#Pinned versions:
+#test that import:
+
 pytest-flakefinder==1.1.0
 #Description: plugin for rerunning tests a fixed number of times in pytest
 #Pinned versions: 1.1.0
--- a/.github/requirements/pip-requirements-macOS.txt
+++ b/.github/requirements/pip-requirements-macOS.txt
@ -16,6 +16,7 @@ pytest==7.3.2
 pytest-xdist==3.3.1
 pytest-rerunfailures==10.3
 pytest-flakefinder==1.1.0
+pytest-shard==0.1.2
 scipy==1.10.1
 sympy==1.11.1
 unittest-xml-reporting<=3.2.0,>=2.0.0
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -225,7 +225,7 @@ jobs:
          cache: pip
      - name: Install dependencies
        run: |
-          pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
+          pip install pytest-rerunfailures==11.1.* pytest-shard==0.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
          pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
      - name: Run run_test.py (nonretryable)
        run: |
--- a/pytest.ini
+++ b/pytest.ini
@ -8,8 +8,6 @@ addopts =
    --capture=sys
    # don't suppress warnings, but don't shove them all to the end either
    -p no:warnings
-    # Use custom pytest shard located in test/pytest_shard_custom.py instead
-    -p no:pytest-shard
    # don't rewrite assertions (usually not a problem in CI due to differences in imports, see #95844)
    --assert=plain
 testpaths =
--- a/test/conftest.py
+++ b/test/conftest.py
@ -19,7 +19,6 @@ import copy
 import json
 import re
 from collections import defaultdict
-from pytest_shard_custom import PytestShardPlugin, pytest_addoptions as shard_addoptions

 # a lot of this file is copied from _pytest.junitxml and modified to get rerun info

@ -85,7 +84,6 @@ def pytest_addoption(parser: Parser) -> None:
        "Emit XML for schema: one of legacy|xunit1|xunit2",
        default="xunit2",
    )
-    shard_addoptions(parser)


 def pytest_configure(config: Config) -> None:
@ -107,8 +105,6 @@ def pytest_configure(config: Config) -> None:
        config.option.stepcurrent = config.getoption("stepcurrent_skip")
    if config.getoption("stepcurrent"):
        config.pluginmanager.register(StepcurrentPlugin(config), "stepcurrentplugin")
-    if config.getoption("num_shards"):
-        config.pluginmanager.register(PytestShardPlugin(config), "pytestshardplugin")


 def pytest_unconfigure(config: Config) -> None:
--- a/test/pytest_shard_custom.py
+++ b/test/pytest_shard_custom.py
@ -1,67 +0,0 @@
-"""
-Custom pytest shard plugin
-https://github.com/AdamGleave/pytest-shard/blob/64610a08dac6b0511b6d51cf895d0e1040d162ad/pytest_shard/pytest_shard.py#L1
-Modifications:
-* shards are now 1 indexed instead of 0 indexed
-* option for printing items in shard
-"""
-import hashlib
-
-from _pytest.config.argparsing import Parser
-
-
-def pytest_addoptions(parser: Parser):
-    """Add options to control sharding."""
-    group = parser.getgroup("shard")
-    group.addoption(
-        "--shard-id", dest="shard_id", type=int, default=1, help="Number of this shard."
-    )
-    group.addoption(
-        "--num-shards",
-        dest="num_shards",
-        type=int,
-        default=1,
-        help="Total number of shards.",
-    )
-    group.addoption(
-        "--print-items",
-        dest="print_items",
-        action="store_true",
-        default=False,
-        help="Print out the items being tested in this shard.",
-    )
-
-
-class PytestShardPlugin:
-    def __init__(self, config):
-        self.config = config
-
-    def pytest_report_collectionfinish(self, config, items) -> str:
-        """Log how many and which items are tested in this shard."""
-        msg = f"Running {len(items)} items in this shard"
-        if config.getoption("print_items"):
-            msg += ": " + ", ".join([item.nodeid for item in items])
-        return msg
-
-    def sha256hash(self, x: str) -> int:
-        return int.from_bytes(hashlib.sha256(x.encode()).digest(), "little")
-
-    def filter_items_by_shard(self, items, shard_id: int, num_shards: int):
-        """Computes `items` that should be tested in `shard_id` out of `num_shards` total shards."""
-        new_items = [
-            item
-            for item in items
-            if self.sha256hash(item.nodeid) % num_shards == shard_id - 1
-        ]
-        return new_items
-
-    def pytest_collection_modifyitems(self, config, items):
-        """Mutate the collection to consist of just items to be tested in this shard."""
-        shard_id = config.getoption("shard_id")
-        shard_total = config.getoption("num_shards")
-        if shard_id < 1 or shard_id > shard_total:
-            raise ValueError(
-                f"{shard_id} is not a valid shard ID out of {shard_total} total shards"
-            )
-
-        items[:] = self.filter_items_by_shard(items, shard_id, shard_total)
--- a/test/run_test.py
+++ b/test/run_test.py
@ -520,11 +520,11 @@ def run_test(
    else:
        unittest_args.extend(
            [
-                f"--shard-id={test_module.shard}",
+                f"--shard-id={test_module.shard - 1}",
                f"--num-shards={test_module.num_shards}",
            ]
        )
-        stepcurrent_key = f"{test_file}_{test_module.shard}_{os.urandom(8).hex()}"
+        stepcurrent_key = f"{test_file}_{test_module.shard - 1}_{os.urandom(8).hex()}"

    if options.verbose:
        unittest_args.append(f'-{"v"*options.verbose}')  # in case of pytest
@ -543,6 +543,7 @@ def run_test(
        unittest_args.extend(
            get_pytest_args(
                options,
+                stepcurrent_key,
                is_cpp_test=is_cpp_test,
                is_distributed_test=is_distributed_test,
            )
@ -605,7 +606,7 @@ def run_test(
        os.close(log_fd)

    command = (launcher_cmd or []) + executable + argv
-    should_retry = "--subprocess" not in command and not RERUN_DISABLED_TESTS
+    num_retries = 0 if "--subprocess" in command or RERUN_DISABLED_TESTS else 2
    is_slow = "slow" in os.environ.get("TEST_CONFIG", "") or "slow" in os.environ.get(
        "BUILD_ENVRIONMENT", ""
    )
@ -613,7 +614,7 @@ def run_test(
        THRESHOLD * 6
        if is_slow
        else THRESHOLD * 3
-        if should_retry
+        if num_retries > 0
        and isinstance(test_module, ShardedTest)
        and test_module.time is not None
        else None
@ -625,18 +626,12 @@ def run_test(
        if IS_CI:
            output = stack.enter_context(open(log_path, "w"))

-        if should_retry:
-            ret_code, was_rerun = run_test_retries(
-                command,
-                test_directory,
-                env,
-                timeout,
-                stepcurrent_key,
-                output,
-                options.continue_through_error,
+        if options.continue_through_error and "--subprocess" not in command:
+            # I think subprocess is better handled by common_utils? but it's not working atm
+            ret_code, was_rerun = run_test_continue_through_error(
+                command, test_directory, env, timeout, stepcurrent_key, output
            )
        else:
-            command.extend([f"--sc={stepcurrent_key}", "--print-items"])
            ret_code, was_rerun = retry_shell(
                command,
                test_directory,
@ -644,6 +639,7 @@ def run_test(
                stderr=output,
                env=env,
                timeout=timeout,
+                retries=num_retries,
            )

            # Pytest return code 5 means no test is collected. This is needed
@ -662,37 +658,23 @@ def run_test(
    return ret_code


-def run_test_retries(
-    command,
-    test_directory,
-    env,
-    timeout,
-    stepcurrent_key,
-    output,
-    continue_through_error,
+def run_test_continue_through_error(
+    command, test_directory, env, timeout, stepcurrent_key, output
 ):
    # Run the test with -x to stop at first failure. Try again, skipping the
    # previously run tests, repeating this until there is a test that fails 3
-    # times (same number of rVetries we typically give).
-    #
-    # If continue through error is not set, then we fail fast.
-    #
-    # If continue through error is set, then we skip that test, and keep going.
-    # Basically if the same test fails 3 times in a row, skip the test on the
-    # next run, but still fail in the end. I take advantage of the value saved
-    # in stepcurrent to keep track of the most recently run test (which is the
-    # one that failed if there was a failure).
-
-    def print_to_file(s):
-        print(s, file=output, flush=True)
+    # times (same number of rVetries we typically give).  Then we skip that
+    # test, and keep going. Basically if the same test fails 3 times in a row,
+    # skip the test on the next run, but still fail in the end. I take advantage
+    # of the value saved in stepcurrent to keep track of the most recently run
+    # test (which is the one that failed if there was a failure).

    num_failures = defaultdict(int)

-    print_items = ["--print-items"]
    sc_command = f"--sc={stepcurrent_key}"
    while True:
        ret_code = shell(
-            command + [sc_command] + print_items,
+            command + [sc_command],
            test_directory,
            stdout=output,
            stderr=output,
@ -703,7 +685,11 @@ def run_test_retries(
        if ret_code == 0:
            break  # Got to the end of the test suite successfully
        signal_name = f" ({SIGNALS_TO_NAMES_DICT[-ret_code]})" if ret_code < 0 else ""
-        print_to_file(f"Got exit code {ret_code}{signal_name}")
+        print(
+            f"Got exit code {ret_code}{signal_name}, retrying...",
+            file=output,
+            flush=True,
+        )

        # Read what just failed
        with open(
@ -713,24 +699,25 @@ def run_test_retries(

        num_failures[current_failure] += 1
        if num_failures[current_failure] >= 3:
-            if not continue_through_error:
-                print_to_file("Stopping at first consistent failure")
-                break
            sc_command = f"--scs={stepcurrent_key}"
        else:
            sc_command = f"--sc={stepcurrent_key}"
-        print_to_file("Retrying...")
-        print_items = []  # do not continue printing them, massive waste of space

    consistent_failures = [x[1:-1] for x in num_failures.keys() if num_failures[x] >= 3]
    flaky_failures = [x[1:-1] for x in num_failures.keys() if 0 < num_failures[x] < 3]
    if len(flaky_failures) > 0:
-        print_to_file(
-            "The following tests failed and then succeeded when run in a new process"
-            + f"{flaky_failures}",
+        print(
+            "The following tests failed flakily (had to be rerun in a new process,"
+            + f" doesn't include reruns froms same process): {flaky_failures}",
+            file=output,
+            flush=True,
        )
    if len(consistent_failures) > 0:
-        print_to_file(f"The following tests failed consistently: {consistent_failures}")
+        print(
+            f"The following tests failed consistently: {consistent_failures}",
+            file=output,
+            flush=True,
+        )
        return 1, True
    return 0, any(x > 0 for x in num_failures.values())

@ -1056,7 +1043,9 @@ def handle_log_file(
    os.remove(file_path)


-def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
+def get_pytest_args(
+    options, stepcurrent_key, is_cpp_test=False, is_distributed_test=False
+):
    if RERUN_DISABLED_TESTS:
        # Distributed tests are too slow, so running them x50 will cause the jobs to timeout after
        # 3+ hours. So, let's opt for less number of reruns. We need at least 150 instances of the
@ -1077,8 +1066,9 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
    ]
    if not is_cpp_test:
        # C++ tests need to be run with pytest directly, not via python
-        # We have a custom pytest shard that conflicts with the normal plugin
        pytest_args.extend(["-p", "no:xdist", "--use-pytest"])
+        if not options.continue_through_error and IS_CI:
+            pytest_args.append(f"--sc={stepcurrent_key}")
    else:
        # Use pytext-dist to run C++ tests in parallel as running them sequentially using run_test
        # is much slower than running them directly
@ -1651,6 +1641,7 @@ def run_tests(
 def check_pip_packages() -> None:
    packages = [
        "pytest-rerunfailures",
+        "pytest-shard",
        "pytest-flakefinder",
        "pytest-xdist",
    ]