diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index b12cc8c236e6..ed99c5f51ca9 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -162,11 +162,6 @@ pytest-xdist==3.3.1 #Pinned versions: #test that import: -pytest-shard==0.1.2 -#Description: plugin spliting up tests in pytest -#Pinned versions: -#test that import: - pytest-flakefinder==1.1.0 #Description: plugin for rerunning tests a fixed number of times in pytest #Pinned versions: 1.1.0 diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt index 9b6986287391..74dd02b4641f 100644 --- a/.github/requirements/pip-requirements-macOS.txt +++ b/.github/requirements/pip-requirements-macOS.txt @@ -16,7 +16,6 @@ pytest==7.3.2 pytest-xdist==3.3.1 pytest-rerunfailures==10.3 pytest-flakefinder==1.1.0 -pytest-shard==0.1.2 scipy==1.10.1 sympy==1.11.1 unittest-xml-reporting<=3.2.0,>=2.0.0 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 351bf83efe11..4316b0c23996 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -225,7 +225,7 @@ jobs: cache: pip - name: Install dependencies run: | - pip install pytest-rerunfailures==11.1.* pytest-shard==0.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.* + pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.* pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/ - name: Run run_test.py (nonretryable) run: | diff --git a/pytest.ini b/pytest.ini index b01291182032..b7e76081694e 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,6 +8,8 @@ addopts = --capture=sys # don't suppress warnings, but don't shove them all to the end either -p no:warnings + # Use custom pytest shard located in test/pytest_shard_custom.py instead + -p no:pytest-shard # don't rewrite assertions (usually not a problem in CI due to differences in imports, see #95844) --assert=plain testpaths = diff --git a/test/conftest.py b/test/conftest.py index 48393f5f1fb8..c7a1570f0bf7 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -19,6 +19,7 @@ import copy import json import re from collections import defaultdict +from pytest_shard_custom import PytestShardPlugin, pytest_addoptions as shard_addoptions # a lot of this file is copied from _pytest.junitxml and modified to get rerun info @@ -84,6 +85,7 @@ def pytest_addoption(parser: Parser) -> None: "Emit XML for schema: one of legacy|xunit1|xunit2", default="xunit2", ) + shard_addoptions(parser) def pytest_configure(config: Config) -> None: @@ -105,6 +107,8 @@ def pytest_configure(config: Config) -> None: config.option.stepcurrent = config.getoption("stepcurrent_skip") if config.getoption("stepcurrent"): config.pluginmanager.register(StepcurrentPlugin(config), "stepcurrentplugin") + if config.getoption("num_shards"): + config.pluginmanager.register(PytestShardPlugin(config), "pytestshardplugin") def pytest_unconfigure(config: Config) -> None: diff --git a/test/pytest_shard_custom.py b/test/pytest_shard_custom.py new file mode 100644 index 000000000000..2acec2ae1654 --- /dev/null +++ b/test/pytest_shard_custom.py @@ -0,0 +1,67 @@ +""" +Custom pytest shard plugin +https://github.com/AdamGleave/pytest-shard/blob/64610a08dac6b0511b6d51cf895d0e1040d162ad/pytest_shard/pytest_shard.py#L1 +Modifications: +* shards are now 1 indexed instead of 0 indexed +* option for printing items in shard +""" +import hashlib + +from _pytest.config.argparsing import Parser + + +def pytest_addoptions(parser: Parser): + """Add options to control sharding.""" + group = parser.getgroup("shard") + group.addoption( + "--shard-id", dest="shard_id", type=int, default=1, help="Number of this shard." + ) + group.addoption( + "--num-shards", + dest="num_shards", + type=int, + default=1, + help="Total number of shards.", + ) + group.addoption( + "--print-items", + dest="print_items", + action="store_true", + default=False, + help="Print out the items being tested in this shard.", + ) + + +class PytestShardPlugin: + def __init__(self, config): + self.config = config + + def pytest_report_collectionfinish(self, config, items) -> str: + """Log how many and which items are tested in this shard.""" + msg = f"Running {len(items)} items in this shard" + if config.getoption("print_items"): + msg += ": " + ", ".join([item.nodeid for item in items]) + return msg + + def sha256hash(self, x: str) -> int: + return int.from_bytes(hashlib.sha256(x.encode()).digest(), "little") + + def filter_items_by_shard(self, items, shard_id: int, num_shards: int): + """Computes `items` that should be tested in `shard_id` out of `num_shards` total shards.""" + new_items = [ + item + for item in items + if self.sha256hash(item.nodeid) % num_shards == shard_id - 1 + ] + return new_items + + def pytest_collection_modifyitems(self, config, items): + """Mutate the collection to consist of just items to be tested in this shard.""" + shard_id = config.getoption("shard_id") + shard_total = config.getoption("num_shards") + if shard_id < 1 or shard_id > shard_total: + raise ValueError( + f"{shard_id} is not a valid shard ID out of {shard_total} total shards" + ) + + items[:] = self.filter_items_by_shard(items, shard_id, shard_total) diff --git a/test/run_test.py b/test/run_test.py index b53a23652407..5e714d1870e8 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -520,11 +520,11 @@ def run_test( else: unittest_args.extend( [ - f"--shard-id={test_module.shard - 1}", + f"--shard-id={test_module.shard}", f"--num-shards={test_module.num_shards}", ] ) - stepcurrent_key = f"{test_file}_{test_module.shard - 1}_{os.urandom(8).hex()}" + stepcurrent_key = f"{test_file}_{test_module.shard}_{os.urandom(8).hex()}" if options.verbose: unittest_args.append(f'-{"v"*options.verbose}') # in case of pytest @@ -543,7 +543,6 @@ def run_test( unittest_args.extend( get_pytest_args( options, - stepcurrent_key, is_cpp_test=is_cpp_test, is_distributed_test=is_distributed_test, ) @@ -606,7 +605,7 @@ def run_test( os.close(log_fd) command = (launcher_cmd or []) + executable + argv - num_retries = 0 if "--subprocess" in command or RERUN_DISABLED_TESTS else 2 + should_retry = "--subprocess" not in command and not RERUN_DISABLED_TESTS is_slow = "slow" in os.environ.get("TEST_CONFIG", "") or "slow" in os.environ.get( "BUILD_ENVRIONMENT", "" ) @@ -614,7 +613,7 @@ def run_test( THRESHOLD * 6 if is_slow else THRESHOLD * 3 - if num_retries > 0 + if should_retry and isinstance(test_module, ShardedTest) and test_module.time is not None else None @@ -626,12 +625,18 @@ def run_test( if IS_CI: output = stack.enter_context(open(log_path, "w")) - if options.continue_through_error and "--subprocess" not in command: - # I think subprocess is better handled by common_utils? but it's not working atm - ret_code, was_rerun = run_test_continue_through_error( - command, test_directory, env, timeout, stepcurrent_key, output + if should_retry: + ret_code, was_rerun = run_test_retries( + command, + test_directory, + env, + timeout, + stepcurrent_key, + output, + options.continue_through_error, ) else: + command.extend([f"--sc={stepcurrent_key}", "--print-items"]) ret_code, was_rerun = retry_shell( command, test_directory, @@ -639,7 +644,6 @@ def run_test( stderr=output, env=env, timeout=timeout, - retries=num_retries, ) # Pytest return code 5 means no test is collected. This is needed @@ -658,23 +662,37 @@ def run_test( return ret_code -def run_test_continue_through_error( - command, test_directory, env, timeout, stepcurrent_key, output +def run_test_retries( + command, + test_directory, + env, + timeout, + stepcurrent_key, + output, + continue_through_error, ): # Run the test with -x to stop at first failure. Try again, skipping the # previously run tests, repeating this until there is a test that fails 3 - # times (same number of rVetries we typically give). Then we skip that - # test, and keep going. Basically if the same test fails 3 times in a row, - # skip the test on the next run, but still fail in the end. I take advantage - # of the value saved in stepcurrent to keep track of the most recently run - # test (which is the one that failed if there was a failure). + # times (same number of rVetries we typically give). + # + # If continue through error is not set, then we fail fast. + # + # If continue through error is set, then we skip that test, and keep going. + # Basically if the same test fails 3 times in a row, skip the test on the + # next run, but still fail in the end. I take advantage of the value saved + # in stepcurrent to keep track of the most recently run test (which is the + # one that failed if there was a failure). + + def print_to_file(s): + print(s, file=output, flush=True) num_failures = defaultdict(int) + print_items = ["--print-items"] sc_command = f"--sc={stepcurrent_key}" while True: ret_code = shell( - command + [sc_command], + command + [sc_command] + print_items, test_directory, stdout=output, stderr=output, @@ -685,11 +703,7 @@ def run_test_continue_through_error( if ret_code == 0: break # Got to the end of the test suite successfully signal_name = f" ({SIGNALS_TO_NAMES_DICT[-ret_code]})" if ret_code < 0 else "" - print( - f"Got exit code {ret_code}{signal_name}, retrying...", - file=output, - flush=True, - ) + print_to_file(f"Got exit code {ret_code}{signal_name}") # Read what just failed with open( @@ -699,25 +713,24 @@ def run_test_continue_through_error( num_failures[current_failure] += 1 if num_failures[current_failure] >= 3: + if not continue_through_error: + print_to_file("Stopping at first consistent failure") + break sc_command = f"--scs={stepcurrent_key}" else: sc_command = f"--sc={stepcurrent_key}" + print_to_file("Retrying...") + print_items = [] # do not continue printing them, massive waste of space consistent_failures = [x[1:-1] for x in num_failures.keys() if num_failures[x] >= 3] flaky_failures = [x[1:-1] for x in num_failures.keys() if 0 < num_failures[x] < 3] if len(flaky_failures) > 0: - print( - "The following tests failed flakily (had to be rerun in a new process," - + f" doesn't include reruns froms same process): {flaky_failures}", - file=output, - flush=True, + print_to_file( + "The following tests failed and then succeeded when run in a new process" + + f"{flaky_failures}", ) if len(consistent_failures) > 0: - print( - f"The following tests failed consistently: {consistent_failures}", - file=output, - flush=True, - ) + print_to_file(f"The following tests failed consistently: {consistent_failures}") return 1, True return 0, any(x > 0 for x in num_failures.values()) @@ -1043,9 +1056,7 @@ def handle_log_file( os.remove(file_path) -def get_pytest_args( - options, stepcurrent_key, is_cpp_test=False, is_distributed_test=False -): +def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False): if RERUN_DISABLED_TESTS: # Distributed tests are too slow, so running them x50 will cause the jobs to timeout after # 3+ hours. So, let's opt for less number of reruns. We need at least 150 instances of the @@ -1066,9 +1077,8 @@ def get_pytest_args( ] if not is_cpp_test: # C++ tests need to be run with pytest directly, not via python + # We have a custom pytest shard that conflicts with the normal plugin pytest_args.extend(["-p", "no:xdist", "--use-pytest"]) - if not options.continue_through_error and IS_CI: - pytest_args.append(f"--sc={stepcurrent_key}") else: # Use pytext-dist to run C++ tests in parallel as running them sequentially using run_test # is much slower than running them directly @@ -1641,7 +1651,6 @@ def run_tests( def check_pip_packages() -> None: packages = [ "pytest-rerunfailures", - "pytest-shard", "pytest-flakefinder", "pytest-xdist", ]