diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index ed99c5f51ca9..b12cc8c236e6 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -162,6 +162,11 @@ pytest-xdist==3.3.1 #Pinned versions: #test that import: +pytest-shard==0.1.2 +#Description: plugin spliting up tests in pytest +#Pinned versions: +#test that import: + pytest-flakefinder==1.1.0 #Description: plugin for rerunning tests a fixed number of times in pytest #Pinned versions: 1.1.0 diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt index 74dd02b4641f..9b6986287391 100644 --- a/.github/requirements/pip-requirements-macOS.txt +++ b/.github/requirements/pip-requirements-macOS.txt @@ -16,6 +16,7 @@ pytest==7.3.2 pytest-xdist==3.3.1 pytest-rerunfailures==10.3 pytest-flakefinder==1.1.0 +pytest-shard==0.1.2 scipy==1.10.1 sympy==1.11.1 unittest-xml-reporting<=3.2.0,>=2.0.0 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 4316b0c23996..351bf83efe11 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -225,7 +225,7 @@ jobs: cache: pip - name: Install dependencies run: | - pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.* + pip install pytest-rerunfailures==11.1.* pytest-shard==0.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.* pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/ - name: Run run_test.py (nonretryable) run: | diff --git a/pytest.ini b/pytest.ini index b7e76081694e..b01291182032 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,8 +8,6 @@ addopts = --capture=sys # don't suppress warnings, but don't shove them all to the end either -p no:warnings - # Use custom pytest shard located in test/pytest_shard_custom.py instead - -p no:pytest-shard # don't rewrite assertions (usually not a problem in CI due to differences in imports, see #95844) --assert=plain testpaths = diff --git a/test/conftest.py b/test/conftest.py index c7a1570f0bf7..48393f5f1fb8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -19,7 +19,6 @@ import copy import json import re from collections import defaultdict -from pytest_shard_custom import PytestShardPlugin, pytest_addoptions as shard_addoptions # a lot of this file is copied from _pytest.junitxml and modified to get rerun info @@ -85,7 +84,6 @@ def pytest_addoption(parser: Parser) -> None: "Emit XML for schema: one of legacy|xunit1|xunit2", default="xunit2", ) - shard_addoptions(parser) def pytest_configure(config: Config) -> None: @@ -107,8 +105,6 @@ def pytest_configure(config: Config) -> None: config.option.stepcurrent = config.getoption("stepcurrent_skip") if config.getoption("stepcurrent"): config.pluginmanager.register(StepcurrentPlugin(config), "stepcurrentplugin") - if config.getoption("num_shards"): - config.pluginmanager.register(PytestShardPlugin(config), "pytestshardplugin") def pytest_unconfigure(config: Config) -> None: diff --git a/test/pytest_shard_custom.py b/test/pytest_shard_custom.py deleted file mode 100644 index 2acec2ae1654..000000000000 --- a/test/pytest_shard_custom.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Custom pytest shard plugin -https://github.com/AdamGleave/pytest-shard/blob/64610a08dac6b0511b6d51cf895d0e1040d162ad/pytest_shard/pytest_shard.py#L1 -Modifications: -* shards are now 1 indexed instead of 0 indexed -* option for printing items in shard -""" -import hashlib - -from _pytest.config.argparsing import Parser - - -def pytest_addoptions(parser: Parser): - """Add options to control sharding.""" - group = parser.getgroup("shard") - group.addoption( - "--shard-id", dest="shard_id", type=int, default=1, help="Number of this shard." - ) - group.addoption( - "--num-shards", - dest="num_shards", - type=int, - default=1, - help="Total number of shards.", - ) - group.addoption( - "--print-items", - dest="print_items", - action="store_true", - default=False, - help="Print out the items being tested in this shard.", - ) - - -class PytestShardPlugin: - def __init__(self, config): - self.config = config - - def pytest_report_collectionfinish(self, config, items) -> str: - """Log how many and which items are tested in this shard.""" - msg = f"Running {len(items)} items in this shard" - if config.getoption("print_items"): - msg += ": " + ", ".join([item.nodeid for item in items]) - return msg - - def sha256hash(self, x: str) -> int: - return int.from_bytes(hashlib.sha256(x.encode()).digest(), "little") - - def filter_items_by_shard(self, items, shard_id: int, num_shards: int): - """Computes `items` that should be tested in `shard_id` out of `num_shards` total shards.""" - new_items = [ - item - for item in items - if self.sha256hash(item.nodeid) % num_shards == shard_id - 1 - ] - return new_items - - def pytest_collection_modifyitems(self, config, items): - """Mutate the collection to consist of just items to be tested in this shard.""" - shard_id = config.getoption("shard_id") - shard_total = config.getoption("num_shards") - if shard_id < 1 or shard_id > shard_total: - raise ValueError( - f"{shard_id} is not a valid shard ID out of {shard_total} total shards" - ) - - items[:] = self.filter_items_by_shard(items, shard_id, shard_total) diff --git a/test/run_test.py b/test/run_test.py index 5e714d1870e8..b53a23652407 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -520,11 +520,11 @@ def run_test( else: unittest_args.extend( [ - f"--shard-id={test_module.shard}", + f"--shard-id={test_module.shard - 1}", f"--num-shards={test_module.num_shards}", ] ) - stepcurrent_key = f"{test_file}_{test_module.shard}_{os.urandom(8).hex()}" + stepcurrent_key = f"{test_file}_{test_module.shard - 1}_{os.urandom(8).hex()}" if options.verbose: unittest_args.append(f'-{"v"*options.verbose}') # in case of pytest @@ -543,6 +543,7 @@ def run_test( unittest_args.extend( get_pytest_args( options, + stepcurrent_key, is_cpp_test=is_cpp_test, is_distributed_test=is_distributed_test, ) @@ -605,7 +606,7 @@ def run_test( os.close(log_fd) command = (launcher_cmd or []) + executable + argv - should_retry = "--subprocess" not in command and not RERUN_DISABLED_TESTS + num_retries = 0 if "--subprocess" in command or RERUN_DISABLED_TESTS else 2 is_slow = "slow" in os.environ.get("TEST_CONFIG", "") or "slow" in os.environ.get( "BUILD_ENVRIONMENT", "" ) @@ -613,7 +614,7 @@ def run_test( THRESHOLD * 6 if is_slow else THRESHOLD * 3 - if should_retry + if num_retries > 0 and isinstance(test_module, ShardedTest) and test_module.time is not None else None @@ -625,18 +626,12 @@ def run_test( if IS_CI: output = stack.enter_context(open(log_path, "w")) - if should_retry: - ret_code, was_rerun = run_test_retries( - command, - test_directory, - env, - timeout, - stepcurrent_key, - output, - options.continue_through_error, + if options.continue_through_error and "--subprocess" not in command: + # I think subprocess is better handled by common_utils? but it's not working atm + ret_code, was_rerun = run_test_continue_through_error( + command, test_directory, env, timeout, stepcurrent_key, output ) else: - command.extend([f"--sc={stepcurrent_key}", "--print-items"]) ret_code, was_rerun = retry_shell( command, test_directory, @@ -644,6 +639,7 @@ def run_test( stderr=output, env=env, timeout=timeout, + retries=num_retries, ) # Pytest return code 5 means no test is collected. This is needed @@ -662,37 +658,23 @@ def run_test( return ret_code -def run_test_retries( - command, - test_directory, - env, - timeout, - stepcurrent_key, - output, - continue_through_error, +def run_test_continue_through_error( + command, test_directory, env, timeout, stepcurrent_key, output ): # Run the test with -x to stop at first failure. Try again, skipping the # previously run tests, repeating this until there is a test that fails 3 - # times (same number of rVetries we typically give). - # - # If continue through error is not set, then we fail fast. - # - # If continue through error is set, then we skip that test, and keep going. - # Basically if the same test fails 3 times in a row, skip the test on the - # next run, but still fail in the end. I take advantage of the value saved - # in stepcurrent to keep track of the most recently run test (which is the - # one that failed if there was a failure). - - def print_to_file(s): - print(s, file=output, flush=True) + # times (same number of rVetries we typically give). Then we skip that + # test, and keep going. Basically if the same test fails 3 times in a row, + # skip the test on the next run, but still fail in the end. I take advantage + # of the value saved in stepcurrent to keep track of the most recently run + # test (which is the one that failed if there was a failure). num_failures = defaultdict(int) - print_items = ["--print-items"] sc_command = f"--sc={stepcurrent_key}" while True: ret_code = shell( - command + [sc_command] + print_items, + command + [sc_command], test_directory, stdout=output, stderr=output, @@ -703,7 +685,11 @@ def run_test_retries( if ret_code == 0: break # Got to the end of the test suite successfully signal_name = f" ({SIGNALS_TO_NAMES_DICT[-ret_code]})" if ret_code < 0 else "" - print_to_file(f"Got exit code {ret_code}{signal_name}") + print( + f"Got exit code {ret_code}{signal_name}, retrying...", + file=output, + flush=True, + ) # Read what just failed with open( @@ -713,24 +699,25 @@ def run_test_retries( num_failures[current_failure] += 1 if num_failures[current_failure] >= 3: - if not continue_through_error: - print_to_file("Stopping at first consistent failure") - break sc_command = f"--scs={stepcurrent_key}" else: sc_command = f"--sc={stepcurrent_key}" - print_to_file("Retrying...") - print_items = [] # do not continue printing them, massive waste of space consistent_failures = [x[1:-1] for x in num_failures.keys() if num_failures[x] >= 3] flaky_failures = [x[1:-1] for x in num_failures.keys() if 0 < num_failures[x] < 3] if len(flaky_failures) > 0: - print_to_file( - "The following tests failed and then succeeded when run in a new process" - + f"{flaky_failures}", + print( + "The following tests failed flakily (had to be rerun in a new process," + + f" doesn't include reruns froms same process): {flaky_failures}", + file=output, + flush=True, ) if len(consistent_failures) > 0: - print_to_file(f"The following tests failed consistently: {consistent_failures}") + print( + f"The following tests failed consistently: {consistent_failures}", + file=output, + flush=True, + ) return 1, True return 0, any(x > 0 for x in num_failures.values()) @@ -1056,7 +1043,9 @@ def handle_log_file( os.remove(file_path) -def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False): +def get_pytest_args( + options, stepcurrent_key, is_cpp_test=False, is_distributed_test=False +): if RERUN_DISABLED_TESTS: # Distributed tests are too slow, so running them x50 will cause the jobs to timeout after # 3+ hours. So, let's opt for less number of reruns. We need at least 150 instances of the @@ -1077,8 +1066,9 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False): ] if not is_cpp_test: # C++ tests need to be run with pytest directly, not via python - # We have a custom pytest shard that conflicts with the normal plugin pytest_args.extend(["-p", "no:xdist", "--use-pytest"]) + if not options.continue_through_error and IS_CI: + pytest_args.append(f"--sc={stepcurrent_key}") else: # Use pytext-dist to run C++ tests in parallel as running them sequentially using run_test # is much slower than running them directly @@ -1651,6 +1641,7 @@ def run_tests( def check_pip_packages() -> None: packages = [ "pytest-rerunfailures", + "pytest-shard", "pytest-flakefinder", "pytest-xdist", ]